├── .github
    └── workflows
    │   ├── CI.yaml
    │   └── docs.yaml
├── .gitignore
├── Cargo.lock
├── Cargo.toml
├── LICENSE
├── README.md
├── ci.sh
├── datafusion-optd-cli
    ├── Cargo.toml
    ├── Dockerfile
    ├── LICENSE.txt
    ├── README.md
    ├── examples
    │   └── cli-session-context.rs
    ├── src
    │   ├── catalog.rs
    │   ├── cli_context.rs
    │   ├── command.rs
    │   ├── exec.rs
    │   ├── functions.rs
    │   ├── helper.rs
    │   ├── highlighter.rs
    │   ├── lib.rs
    │   ├── main.rs
    │   ├── object_storage.rs
    │   ├── pool_type.rs
    │   ├── print_format.rs
    │   └── print_options.rs
    ├── tests
    │   ├── cli_integration.rs
    │   └── data
    │   │   └── sql.txt
    └── tpch-sf0_01
    │   ├── LICENSE
    │   ├── README.md
    │   ├── customer.csv
    │   ├── lineitem.csv
    │   ├── nation.csv
    │   ├── orders.csv
    │   ├── part.csv
    │   ├── partsupp.csv
    │   ├── populate.sql
    │   ├── region.csv
    │   ├── simple_manual_test.sql
    │   ├── supplier.csv
    │   ├── tbl_to_csv.py
    │   └── test.sql
├── dev_scripts
    └── which_queries_work.sh
├── docs
    ├── .gitignore
    ├── README.md
    ├── book.toml
    ├── custom.css
    └── src
    │   ├── SUMMARY.md
    │   ├── cost_model.md
    │   ├── cost_model_benchmarking.md
    │   ├── datafusion.md
    │   ├── datafusion_cli.md
    │   ├── demo_three_join.md
    │   ├── demo_tpch_q8.md
    │   ├── miscellaneous.md
    │   ├── optd-cascades
    │       ├── optd-cascades-1.svg
    │       ├── optd-cascades-2.svg
    │       ├── optd-cascades-3.svg
    │       ├── optd-cascades-4.svg
    │       ├── optd-datafusion-overview.svg
    │       ├── optd-plan-repr-1.svg
    │       ├── optd-plan-repr-2.svg
    │       ├── optd-reopt-architecture.svg
    │       ├── optd-reopt-plan.svg
    │       ├── optd-rule-1.svg
    │       └── optd-rule-2.svg
    │   ├── optimizer.md
    │   ├── partial_exploration.md
    │   ├── plan_repr.md
    │   ├── properties.md
    │   ├── reoptimization.md
    │   ├── rule_engine.md
    │   └── sqlplannertest.md
├── optd-adaptive-demo
    ├── Cargo.toml
    └── src
    │   └── bin
    │       ├── optd-adaptive-three-join.rs
    │       └── optd-adaptive-tpch-q8.rs
├── optd-core
    ├── Cargo.toml
    └── src
    │   ├── cascades.rs
    │   ├── cascades
    │       ├── memo.rs
    │       ├── optimizer.rs
    │       ├── rule_match.rs
    │       └── tasks2.rs
    │   ├── cost.rs
    │   ├── heuristics.rs
    │   ├── heuristics
    │       └── optimizer.rs
    │   ├── lib.rs
    │   ├── logical_property.rs
    │   ├── nodes.rs
    │   ├── optimizer.rs
    │   ├── physical_property.rs
    │   ├── rules.rs
    │   ├── rules
    │       └── ir.rs
    │   ├── tests.rs
    │   └── tests
    │       ├── common.rs
    │       └── heuristics_physical_property.rs
├── optd-datafusion-bridge
    ├── Cargo.toml
    └── src
    │   ├── from_optd.rs
    │   ├── into_optd.rs
    │   ├── lib.rs
    │   └── physical_collector.rs
├── optd-datafusion-repr-adv-cost
    ├── Cargo.toml
    └── src
    │   ├── adv_stats.rs
    │   ├── adv_stats
    │       ├── agg.rs
    │       ├── filter.rs
    │       ├── filter
    │       │   ├── in_list.rs
    │       │   └── like.rs
    │       ├── join.rs
    │       ├── limit.rs
    │       └── stats.rs
    │   └── lib.rs
├── optd-datafusion-repr
    ├── Cargo.toml
    └── src
    │   ├── cost.rs
    │   ├── cost
    │       ├── adaptive_cost.rs
    │       └── base_cost.rs
    │   ├── explain.rs
    │   ├── lib.rs
    │   ├── memo_ext.rs
    │   ├── optimizer_ext.rs
    │   ├── plan_nodes.rs
    │   ├── plan_nodes
    │       ├── agg.rs
    │       ├── empty_relation.rs
    │       ├── filter.rs
    │       ├── join.rs
    │       ├── limit.rs
    │       ├── macros.rs
    │       ├── predicates.rs
    │       ├── predicates
    │       │   ├── between_pred.rs
    │       │   ├── bin_op_pred.rs
    │       │   ├── cast_pred.rs
    │       │   ├── column_ref_pred.rs
    │       │   ├── constant_pred.rs
    │       │   ├── data_type_pred.rs
    │       │   ├── extern_column_ref_pred.rs
    │       │   ├── func_pred.rs
    │       │   ├── in_list_pred.rs
    │       │   ├── like_pred.rs
    │       │   ├── list_pred.rs
    │       │   ├── log_op_pred.rs
    │       │   ├── sort_order_pred.rs
    │       │   └── un_op_pred.rs
    │       ├── projection.rs
    │       ├── scan.rs
    │       ├── sort.rs
    │       └── subquery.rs
    │   ├── properties.rs
    │   ├── properties
    │       ├── column_ref.rs
    │       └── schema.rs
    │   ├── rules.rs
    │   ├── rules
    │       ├── eliminate_duplicated_expr.rs
    │       ├── eliminate_limit.rs
    │       ├── filter.rs
    │       ├── filter_pushdown.rs
    │       ├── joins.rs
    │       ├── macros.rs
    │       ├── physical.rs
    │       ├── project_transpose.rs
    │       ├── project_transpose
    │       │   ├── project_filter_transpose.rs
    │       │   ├── project_join_transpose.rs
    │       │   ├── project_merge.rs
    │       │   └── project_transpose_common.rs
    │       ├── subquery.rs
    │       └── subquery
    │       │   └── depjoin_pushdown.rs
    │   ├── testing.rs
    │   ├── testing
    │       ├── dummy_cost.rs
    │       └── tpch_catalog.rs
    │   └── utils.rs
├── optd-gungnir
    ├── Cargo.toml
    └── src
    │   ├── lib.rs
    │   ├── stats.rs
    │   ├── stats
    │       ├── counter.rs
    │       ├── hyperloglog.rs
    │       ├── misragries.rs
    │       ├── murmur2.rs
    │       └── tdigest.rs
    │   ├── utils.rs
    │   └── utils
    │       └── arith_encoder.rs
├── optd-perfbench
    ├── Cargo.toml
    ├── src
    │   ├── benchmark.rs
    │   ├── cardbench.rs
    │   ├── datafusion_dbms.rs
    │   ├── job.rs
    │   ├── lib.rs
    │   ├── main.rs
    │   ├── postgres_dbms.rs
    │   ├── shell.rs
    │   ├── tpch.rs
    │   └── truecard.rs
    └── tests
    │   └── cardtest_integration.rs
├── optd-sqllogictest
    ├── Cargo.toml
    ├── slt
    │   ├── _basic_tables.slt.part
    │   ├── _tpch_tables.slt.part
    │   ├── basic.slt
    │   ├── tpch-q1.slt
    │   ├── tpch-q10.slt
    │   ├── tpch-q11.slt
    │   ├── tpch-q12.slt
    │   ├── tpch-q13.slt
    │   ├── tpch-q14.slt
    │   ├── tpch-q15.slt
    │   ├── tpch-q16.slt
    │   ├── tpch-q17.slt
    │   ├── tpch-q18.slt.disabled
    │   ├── tpch-q19.slt
    │   ├── tpch-q2.slt.disabled
    │   ├── tpch-q20.slt
    │   ├── tpch-q21.slt.disabled
    │   ├── tpch-q22.slt
    │   ├── tpch-q3.slt
    │   ├── tpch-q4.slt
    │   ├── tpch-q5.slt
    │   ├── tpch-q6.slt
    │   ├── tpch-q7.slt
    │   ├── tpch-q8.slt
    │   ├── tpch-q9.slt
    │   ├── unnest-agg-nulls.slt
    │   ├── unnest-count-star.slt
    │   ├── unnest-dup.slt
    │   ├── unnest-exists-2.slt
    │   ├── unnest-exists-uncor.slt
    │   ├── unnest-exists.slt
    │   ├── unnest-extern-out-of-order.slt
    │   ├── unnest-in-exists.slt
    │   ├── unnest-in-uncor.slt
    │   ├── unnest-in.slt
    │   └── unnest-not-in-uncor.slt
    ├── src
    │   └── lib.rs
    └── tests
    │   └── harness.rs
├── optd-sqlplannertest
    ├── Cargo.toml
    ├── README.md
    ├── benches
    │   └── planner_bench.rs
    ├── src
    │   ├── bench_helper.rs
    │   ├── bench_helper
    │   │   ├── execution.rs
    │   │   └── planning.rs
    │   ├── bin
    │   │   └── planner_test_apply.rs
    │   └── lib.rs
    └── tests
    │   ├── basic
    │       ├── basic_nodes.planner.sql
    │       ├── basic_nodes.yml
    │       ├── constant_predicate.planner.sql
    │       ├── constant_predicate.yml
    │       ├── cross_product.planner.sql
    │       ├── cross_product.yml
    │       ├── eliminate_duplicated_expr.planner.sql
    │       ├── eliminate_duplicated_expr.yml
    │       ├── eliminate_limit.planner.sql
    │       ├── eliminate_limit.yml
    │       ├── eliminate_proj.planner.sql
    │       ├── eliminate_proj.yml
    │       ├── empty_relation.planner.sql
    │       ├── empty_relation.yml
    │       ├── filter.planner.sql
    │       ├── filter.yml
    │       ├── verbose.planner.sql
    │       └── verbose.yml
    │   ├── expressions
    │       ├── redundant_exprs.planner.sql
    │       └── redundant_exprs.yml
    │   ├── joins
    │       ├── join_enumerate.planner.sql
    │       ├── join_enumerate.yml
    │       ├── multi-join.planner.sql
    │       ├── multi-join.yml
    │       ├── self-join.planner.sql
    │       └── self-join.yml
    │   ├── planner_test.rs
    │   ├── pushdowns
    │       ├── fliter_transpose.planner.sql
    │       └── fliter_transpose.yml
    │   ├── subqueries
    │       ├── subquery_unnesting.planner.sql
    │       └── subquery_unnesting.yml
    │   ├── tpch
    │       ├── bench_populate.sql
    │       ├── q1.planner.sql
    │       ├── q1.yml
    │       ├── q10.planner.sql
    │       ├── q10.yml
    │       ├── q11.planner.sql
    │       ├── q11.yml
    │       ├── q12.planner.sql
    │       ├── q12.yml
    │       ├── q13.planner.sql
    │       ├── q13.yml
    │       ├── q14.planner.sql
    │       ├── q14.yml
    │       ├── q15.planner.sql
    │       ├── q15.yml
    │       ├── q16.planner.sql
    │       ├── q16.yml
    │       ├── q17.planner.sql
    │       ├── q17.yml
    │       ├── q18.yml.disabled
    │       ├── q19.planner.sql
    │       ├── q19.yml
    │       ├── q2.planner.sql
    │       ├── q2.yml
    │       ├── q20.planner.sql
    │       ├── q20.yml
    │       ├── q21.yml.disabled
    │       ├── q22.planner.sql
    │       ├── q22.yml
    │       ├── q3.planner.sql
    │       ├── q3.yml
    │       ├── q4.planner.sql
    │       ├── q4.yml
    │       ├── q5.planner.sql
    │       ├── q5.yml
    │       ├── q6.planner.sql
    │       ├── q6.yml
    │       ├── q7.planner.sql
    │       ├── q7.yml
    │       ├── q8.planner.sql
    │       ├── q8.yml
    │       ├── q9.planner.sql
    │       ├── q9.yml
    │       └── schema.sql
    │   └── utils
    │       ├── memo_dump.planner.sql
    │       └── memo_dump.yml
├── rust-toolchain
└── tpch_diff.sh


/.github/workflows/CI.yaml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | env:
12 |   CARGO_TERM_COLOR: always
13 | 
14 | jobs:
15 |   check:
16 |     runs-on: ubuntu-latest
17 | 
18 |     services:
19 |       postgres:
20 |         image: postgres:15
21 |         env:
22 |           POSTGRES_USER: test_user
23 |           POSTGRES_PASSWORD: password
24 |           POSTGRES_DB: postgres
25 |         ports:
26 |           - 5432:5432
27 |         options: >-
28 |           --health-cmd pg_isready
29 |           --health-interval 10s
30 |           --health-timeout 5s
31 |           --health-retries 5
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v2
35 |       - uses: actions-rs/toolchain@v1
36 |         with:
37 |           profile: minimal
38 |           components: rustfmt, clippy
39 |       - name: Check code format
40 |         uses: actions-rs/cargo@v1
41 |         with:
42 |           command: fmt
43 |           args: --all -- --check
44 |       - name: Clippy
45 |         uses: actions-rs/cargo@v1
46 |         with:
47 |           command: clippy
48 |           args: --workspace --all-targets --all-features --locked -- -D warnings
49 |       - uses: taiki-e/install-action@nextest
50 |       - name: Test
51 |         uses: actions-rs/cargo@v1
52 |         with:
53 |           command: nextest
54 |           args: run --no-fail-fast --workspace --all-features --locked
55 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yaml:
--------------------------------------------------------------------------------
 1 | name: mdbook gh pages
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   workflow_dispatch:
 8 | 
 9 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
10 | permissions:
11 |   contents: read
12 |   pages: write
13 |   id-token: write
14 | 
15 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
16 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
17 | concurrency:
18 |   group: "pages"
19 |   cancel-in-progress: false
20 | 
21 | jobs:
22 |   build:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - uses: actions/checkout@v2
26 |       - name: setup mdbook
27 |         uses: peaceiris/actions-mdbook@v1
28 |         with:
29 |           mdbook-version: 'latest'
30 |       - run: cd docs && mdbook build
31 |       - name: upload dist
32 |         uses: actions/upload-pages-artifact@v3
33 |         with:
34 |           path: docs/book/
35 |   deploy:
36 |     runs-on: ubuntu-latest
37 |     # Add a dependency to the build job
38 |     needs: build
39 |     # Deploy to the github-pages environment
40 |     environment:
41 |       name: github-pages
42 |       url: ${{ steps.deployment.outputs.page_url }}
43 |     steps:
44 |       - name: deploy to gh pages
45 |         id: deployment
46 |         uses: actions/deploy-pages@v4 
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode
3 | /.DS_Store
4 | /.idea
5 | .history
6 | **/*_workspace/


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |     "datafusion-optd-cli",
 4 |     "optd-core",
 5 |     "optd-datafusion-bridge",
 6 |     "optd-datafusion-repr",
 7 |     "optd-sqlplannertest",
 8 |     "optd-adaptive-demo",
 9 |     "optd-gungnir",
10 |     "optd-perfbench",
11 |     "optd-datafusion-repr-adv-cost",
12 |     "optd-sqllogictest",
13 | ]
14 | resolver = "2"
15 | 
16 | [workspace.package]
17 | version = "0.1.1"
18 | edition = "2021"
19 | homepage = "https://github.com/cmu-db/optd"
20 | keywords = ["sql", "database", "optimizer", "datafusion"]
21 | license = "MIT"
22 | repository = "https://github.com/cmu-db/optd"
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023-2024 CMU Database Group
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/ci.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # runs the stuff in CI.yaml locally
 3 | # unfortunately this needs to be updated manually. just update it if you get annoyed by GHAs failing
 4 | 
 5 | set -e
 6 | 
 7 | cargo fmt --all -- --check
 8 | cargo clippy --workspace --all-targets --all-features --locked -- -D warnings
 9 | cargo test --no-fail-fast --workspace --all-features --locked
10 | 
11 | # %s is a workaround because printing --- doesn"t work in some shells
12 | # this just makes it more obvious when the CI has passed
13 | printf "%s\n| \033[32m\033[1mCI PASSED\033[0m |\n%s\n" "-------------" "-------------"


--------------------------------------------------------------------------------
/datafusion-optd-cli/Cargo.toml:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied.  See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | 
18 | [package]
19 | name = "datafusion-optd-cli"
20 | description = "Command Line Client for DataFusion query engine."
21 | version = "43.0.0"
22 | authors = ["Apache DataFusion <dev@datafusion.apache.org>"]
23 | edition = "2021"
24 | keywords = ["arrow", "datafusion", "query", "sql"]
25 | license = "Apache-2.0"
26 | homepage = "https://github.com/cmu-db/optd"
27 | repository = "https://github.com/cmu-db/optd"
28 | # Specify MSRV here as `cargo msrv` doesn't support workspace version
29 | rust-version = "1.79"
30 | readme = "README.md"
31 | 
32 | [dependencies]
33 | arrow = { version = "53.0.0" }
34 | async-trait = "0.1.73"
35 | aws-config = "1.5.5"
36 | aws-sdk-sso = "1.43.0"
37 | aws-sdk-ssooidc = "1.44.0"
38 | aws-sdk-sts = "1.43.0"
39 | # end pin aws-sdk crates
40 | aws-credential-types = "1.2.0"
41 | clap = { version = "4.5.16", features = ["derive", "cargo"] }
42 | datafusion = { version = "43.0.0", features = [
43 |     "avro",
44 |     "crypto_expressions",
45 |     "datetime_expressions",
46 |     "encoding_expressions",
47 |     "parquet",
48 |     "regex_expressions",
49 |     "unicode_expressions",
50 |     "compression",
51 | ] }
52 | dirs = "5.0.1"
53 | env_logger = "0.11"
54 | futures = "0.3"
55 | mimalloc = { version = "0.1", default-features = false }
56 | object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] }
57 | parking_lot = { version = "0.12" }
58 | parquet = { version = "53.0.0", default-features = false }
59 | regex = "1.8"
60 | rustyline = "14.0"
61 | tokio = { version = "1.24", features = [
62 |     "macros",
63 |     "rt",
64 |     "rt-multi-thread",
65 |     "sync",
66 |     "parking_lot",
67 |     "signal",
68 | ] }
69 | url = "2.2"
70 | # begin optd-cli patch
71 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" }
72 | # end optd-cli patch
73 | 
74 | [dev-dependencies]
75 | assert_cmd = "2.0"
76 | ctor = "0.2.0"
77 | predicates = "3.0"
78 | rstest = "0.22"
79 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied.  See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | 
18 | FROM rust:1.79-bookworm AS builder
19 | 
20 | COPY . /usr/src/datafusion
21 | COPY ./datafusion /usr/src/datafusion/datafusion
22 | COPY ./datafusion-cli /usr/src/datafusion/datafusion-cli
23 | 
24 | WORKDIR /usr/src/datafusion/datafusion-cli
25 | 
26 | RUN rustup component add rustfmt
27 | 
28 | RUN cargo build --release
29 | 
30 | FROM debian:bookworm-slim
31 | 
32 | COPY --from=builder /usr/src/datafusion/datafusion-cli/target/release/datafusion-cli /usr/local/bin
33 | 
34 | RUN mkdir /data
35 | 
36 | ENTRYPOINT ["datafusion-cli"]
37 | 
38 | CMD ["--data-path", "/data"]
39 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 |   Licensed to the Apache Software Foundation (ASF) under one
 3 |   or more contributor license agreements.  See the NOTICE file
 4 |   distributed with this work for additional information
 5 |   regarding copyright ownership.  The ASF licenses this file
 6 |   to you under the Apache License, Version 2.0 (the
 7 |   "License"); you may not use this file except in compliance
 8 |   with the License.  You may obtain a copy of the License at
 9 | 
10 |     http://www.apache.org/licenses/LICENSE-2.0
11 | 
12 |   Unless required by applicable law or agreed to in writing,
13 |   software distributed under the License is distributed on an
14 |   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 |   KIND, either express or implied.  See the License for the
16 |   specific language governing permissions and limitations
17 |   under the License.
18 | -->
19 | 
20 | <!-- Note this file is included in the crates.io page as well https://crates.io/crates/datafusion-cli -->
21 | 
22 | # DataFusion Command-line Interface
23 | 
24 | [DataFusion](https://datafusion.apache.org/) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format.
25 | 
26 | DataFusion CLI (`datafusion-cli`) is a small command line utility that runs SQL queries using the DataFusion engine.
27 | 
28 | # Frequently Asked Questions
29 | 
30 | ## Where can I find more information?
31 | 
32 | See the [`datafusion-cli` documentation](https://datafusion.apache.org/user-guide/cli/index.html) for further information.
33 | 
34 | ## How do I make my IDE work with `datafusion-cli`?
35 | 
36 | "open" the `datafusion/datafusion-cli` project as its own top level
37 | project in my IDE (rather than opening `datafusion`)
38 | 
39 | The reason `datafusion-cli` is not part of the main workspace in
40 | [`datafusion Cargo.toml`] file is that `datafusion-cli` is a binary and has a
41 | checked in `Cargo.lock` file to ensure reproducible builds.
42 | 
43 | However, the `datafusion` and sub crates are intended for use as libraries and
44 | thus do not have a `Cargo.lock` file checked in.
45 | 
46 | [`datafusion cargo.toml`]: https://github.com/apache/datafusion/blob/main/Cargo.toml
47 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Licensed to the Apache Software Foundation (ASF) under one
 2 | // or more contributor license agreements.  See the NOTICE file
 3 | // distributed with this work for additional information
 4 | // regarding copyright ownership.  The ASF licenses this file
 5 | // to you under the Apache License, Version 2.0 (the
 6 | // "License"); you may not use this file except in compliance
 7 | // with the License.  You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied.  See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 | 
18 | #![doc = include_str!("../README.md")]
19 | pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
20 | 
21 | pub mod catalog;
22 | pub mod cli_context;
23 | pub mod command;
24 | pub mod exec;
25 | pub mod functions;
26 | pub mod helper;
27 | pub mod highlighter;
28 | pub mod object_storage;
29 | pub mod pool_type;
30 | pub mod print_format;
31 | pub mod print_options;
32 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/src/pool_type.rs:
--------------------------------------------------------------------------------
 1 | // Licensed to the Apache Software Foundation (ASF) under one
 2 | // or more contributor license agreements.  See the NOTICE file
 3 | // distributed with this work for additional information
 4 | // regarding copyright ownership.  The ASF licenses this file
 5 | // to you under the Apache License, Version 2.0 (the
 6 | // "License"); you may not use this file except in compliance
 7 | // with the License.  You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied.  See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 | 
18 | use std::{
19 |     fmt::{self, Display, Formatter},
20 |     str::FromStr,
21 | };
22 | 
23 | #[derive(PartialEq, Debug, Clone)]
24 | pub enum PoolType {
25 |     Greedy,
26 |     Fair,
27 | }
28 | 
29 | impl FromStr for PoolType {
30 |     type Err = String;
31 | 
32 |     fn from_str(s: &str) -> Result<Self, Self::Err> {
33 |         match s {
34 |             "Greedy" | "greedy" => Ok(PoolType::Greedy),
35 |             "Fair" | "fair" => Ok(PoolType::Fair),
36 |             _ => Err(format!("Invalid memory pool type '{}'", s)),
37 |         }
38 |     }
39 | }
40 | 
41 | impl Display for PoolType {
42 |     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
43 |         match self {
44 |             PoolType::Greedy => write!(f, "greedy"),
45 |             PoolType::Fair => write!(f, "fair"),
46 |         }
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/tests/cli_integration.rs:
--------------------------------------------------------------------------------
 1 | // Licensed to the Apache Software Foundation (ASF) under one
 2 | // or more contributor license agreements.  See the NOTICE file
 3 | // distributed with this work for additional information
 4 | // regarding copyright ownership.  The ASF licenses this file
 5 | // to you under the Apache License, Version 2.0 (the
 6 | // "License"); you may not use this file except in compliance
 7 | // with the License.  You may obtain a copy of the License at
 8 | //
 9 | //   http://www.apache.org/licenses/LICENSE-2.0
10 | //
11 | // Unless required by applicable law or agreed to in writing,
12 | // software distributed under the License is distributed on an
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | // KIND, either express or implied.  See the License for the
15 | // specific language governing permissions and limitations
16 | // under the License.
17 | 
18 | use std::process::{Command, Stdio};
19 | 
20 | use assert_cmd::prelude::CommandCargoExt;
21 | 
22 | #[cfg(test)]
23 | #[ctor::ctor]
24 | fn init() {
25 |     // Enable RUST_LOG logging configuration for tests
26 |     let _ = env_logger::try_init();
27 | }
28 | 
29 | // TODO: fix these later. They're commented out since they were broken when we first received the codebase.
30 | // #[rstest]
31 | // #[case::exec_from_commands(
32 | //     ["--command", "select 1", "--format", "json", "-q"],
33 | //     "[{\"Int64(1)\":1}]\n"
34 | // )]
35 | // #[case::exec_multiple_statements(
36 | //     ["--command", "select 1; select 2;", "--format", "json", "-q"],
37 | //     "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n"
38 | // )]
39 | // #[case::exec_from_files(
40 | //     ["--file", "tests/data/sql.txt", "--format", "json", "-q"],
41 | //     "[{\"Int64(1)\":1}]\n"
42 | // )]
43 | // #[case::set_batch_size(
44 | //     ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"],
45 | //     "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n"
46 | // )]
47 | // #[test]
48 | // fn cli_quick_test<'a>(#[case] args: impl IntoIterator<Item = &'a str>, #[case] expected: &str) {
49 | //     let mut cmd = Command::cargo_bin("datafusion-optd-cli").unwrap();
50 | //     cmd.args(args);
51 | //     cmd.assert().stdout(predicate::eq(expected));
52 | // }
53 | 
54 | #[test]
55 | fn cli_test_tpch() {
56 |     let mut cmd = Command::cargo_bin("datafusion-optd-cli").unwrap();
57 |     cmd.current_dir(".."); // all paths in `test.sql` assume we're in the base dir of the repo
58 |     cmd.args(["--file", "datafusion-optd-cli/tpch-sf0_01/test.sql"]);
59 |     cmd.stdout(Stdio::piped()).stderr(Stdio::piped());
60 |     let status = cmd.status().unwrap();
61 |     assert!(
62 |         status.success(),
63 |         "should not have crashed when running tpch"
64 |     );
65 | }
66 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/tests/data/sql.txt:
--------------------------------------------------------------------------------
1 | select 1;


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/README.md:
--------------------------------------------------------------------------------
 1 | ## Generate Data
 2 | 
 3 | ```
 4 | git clone https://github.com/electrum/tpch-dbgen.git
 5 | cd tpch-dbgen
 6 | make
 7 | ./dbgen -s 0.001
 8 | mv *.tbl <target-dir>
 9 | ```
10 | 
11 | This directory is copy-pasted from the RisingLight project.
12 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/nation.csv:
--------------------------------------------------------------------------------
 1 | 0|ALGERIA|0| haggle. carefully final deposits detect slyly agai
 2 | 1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon
 3 | 2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special 
 4 | 3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
 5 | 4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
 6 | 5|ETHIOPIA|0|ven packages wake quickly. regu
 7 | 6|FRANCE|3|refully final requests. regular, ironi
 8 | 7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco
 9 | 8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun
10 | 9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
11 | 10|IRAN|4|efully alongside of the slyly final dependencies. 
12 | 11|IRAQ|4|nic deposits boost atop the quickly final requests? quickly regula
13 | 12|JAPAN|2|ously. final, express gifts cajole a
14 | 13|JORDAN|4|ic deposits are blithely about the carefully regular pa
15 | 14|KENYA|0| pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
16 | 15|MOROCCO|0|rns. blithely bold courts among the closely regular packages use furiously bold platelets?
17 | 16|MOZAMBIQUE|0|s. ironic, unusual asymptotes wake blithely r
18 | 17|PERU|1|platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
19 | 18|CHINA|2|c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
20 | 19|ROMANIA|3|ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
21 | 20|SAUDI ARABIA|4|ts. silent requests haggle. closely express packages sleep across the blithely
22 | 21|VIETNAM|2|hely enticingly express accounts. even, final 
23 | 22|RUSSIA|3| requests against the platelets use never according to the quickly regular pint
24 | 23|UNITED KINGDOM|3|eans boost carefully special requests. accounts are. carefull
25 | 24|UNITED STATES|1|y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/region.csv:
--------------------------------------------------------------------------------
1 | 0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to 
2 | 1|AMERICA|hs use ironic, even requests. s
3 | 2|ASIA|ges. thinly even pinto beans ca
4 | 3|EUROPE|ly final courts cajole furiously final excuse
5 | 4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql:
--------------------------------------------------------------------------------
 1 | -- This is just used if you want to run really simple manual tests on the CLI. Feel free to delete the whole thing and write your own manual tests
 2 | -- Command: `cargo run --bin datafusion-optd-cli -- --enable-df-logical -f datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql`
 3 | CREATE TABLE NATION  (
 4 |     N_NATIONKEY  INT NOT NULL,
 5 |     N_NAME       CHAR(25) NOT NULL,
 6 |     N_REGIONKEY  INT NOT NULL,
 7 |     N_COMMENT    VARCHAR(152)
 8 | );
 9 | 
10 | CREATE EXTERNAL TABLE nation_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION 'datafusion-optd-cli/tpch-sf0_01/nation.csv';
11 | insert into nation select column_1, column_2, column_3, column_4 from nation_tbl;
12 | 
13 | SELECT * FROM nation where nation.n_nationkey = 1 OR nation.n_nationkey = 2 OR nation.n_nationkey = 5;
14 | 


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/supplier.csv:
--------------------------------------------------------------------------------
 1 | 1|Supplier#000000001| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful
 2 | 2|Supplier#000000002|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen
 3 | 3|Supplier#000000003|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl
 4 | 4|Supplier#000000004|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp
 5 | 5|Supplier#000000005|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea
 6 | 6|Supplier#000000006|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. 
 7 | 7|Supplier#000000007|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit
 8 | 8|Supplier#000000008|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl
 9 | 9|Supplier#000000009|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac
10 | 10|Supplier#000000010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar


--------------------------------------------------------------------------------
/datafusion-optd-cli/tpch-sf0_01/tbl_to_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | 
 4 | def tbl_to_csv(file):
 5 |     lines = []
 6 |     for line in Path(file).read_text().splitlines():
 7 |         # Replace the delimiter `|` with `,`
 8 |         line = line.strip('|')
 9 |         lines.append(line)
10 |     # Write the converted content to a new `.csv` file
11 |     Path(file.replace('.tbl', '.csv')).write_text('\n'.join(lines))
12 | 
13 | def main():
14 |     # Find all files end with `.tbl` in the current directory
15 |     # and convert them to `.csv` files.
16 |     for file in os.listdir('.'):
17 |         if file.endswith('.tbl'):
18 |             tbl_to_csv(file)
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     main()
23 | 


--------------------------------------------------------------------------------
/dev_scripts/which_queries_work.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | benchmark_name=$1
 3 | USAGE="Usage: $0 [job|joblight|tpch]"
 4 | 
 5 | if [ $# -ne 1 ]; then
 6 |     echo >&2 $USAGE
 7 |     exit 1
 8 | fi
 9 | 
10 | if [[ "$benchmark_name" == "job" ]]; then
11 |     all_ids="1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,6f,7a,7b,7c,8a,8b,8c,8d,9a,9b,9c,9d,10a,10b,10c,11a,11b,11c,11d,12a,12b,12c,13a,13b,13c,13d,14a,14b,14c,15a,15b,15c,15d,16a,16b,16c,16d,17a,17b,17c,17d,17e,17f,18a,18b,18c,19a,19b,19c,19d,20a,20b,20c,21a,21b,21c,22a,22b,22c,22d,23a,23b,23c,24a,24b,25a,25b,25c,26a,26b,26c,27a,27b,27c,28a,28b,28c,29a,29b,29c,30a,30b,30c,31a,31b,31c,32a,32b,33a,33b,33c"
12 |     vec_var_name="WORKING_JOB_QUERY_IDS"
13 | elif [[ "$benchmark_name" == "joblight" ]]; then
14 |     all_ids="1a,1b,1c,1d,2a,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,7a,7b,7c,8a,8b,8c,9a,9b,10a,10b,10c,11a,11b,11c,12a,12b,12c,13a,14a,14b,14c,15a,15b,15c,16a,17a,17b,17c,18a,18b,18c,19a,19b,20a,20b,20c,21a,21b,22a,22b,22c,23a,23b,24a,24b,25a,26a,26b,27a,27b,28a"
15 |     vec_var_name="WORKING_JOBLIGHT_QUERY_IDS"
16 | elif [[ "$benchmark_name" == "tpch" ]]; then
17 |     all_ids="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22"
18 |     vec_var_name="WORKING_QUERY_IDS"
19 | else
20 |     echo >&2 $USAGE
21 |     exit 1
22 | fi
23 | 
24 | successful_ids=()
25 | IFS=','
26 | for id in $all_ids; do
27 |     cargo run --release --bin optd-perfbench cardbench $benchmark_name --query-ids $id &>/dev/null
28 | 
29 |     if [ $? -eq 0 ]; then
30 |         echo >&2 $id succeeded
31 |         successful_ids+=("$id")
32 |     else
33 |         echo >&2 $id failed
34 |     fi
35 | done
36 | 
37 | echo >&2
38 | echo " Useful Outputs"
39 | echo "================"
40 | working_query_ids_vec="pub const ${vec_var_name}: &[&str] = &[\"${successful_ids[0]}\""
41 | IFS=" "
42 | for id in "${successful_ids[@]:1}"; do
43 |     working_query_ids_vec+=", \"$id\""
44 | done
45 | working_query_ids_vec+="]"
46 | echo "${working_query_ids_vec}"
47 | IFS=","
48 | echo "--query-ids ${successful_ids[*]}"
49 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | book
2 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # optd Development Documentation
 2 | 
 3 | The docs is written in `mdbook` format. You can follow the [`mdbook` installation guide](https://rust-lang.github.io/mdBook/guide/installation.html) to set up the environment. After installing mdbook, you can use the following command to start the docs server:
 4 | 
 5 | ```shell
 6 | mdbook serve
 7 | ```
 8 | 
 9 | The online version of the documentation can be found at [https://cmu-db.github.io/optd/](https://cmu-db.github.io/optd/).
10 | 


--------------------------------------------------------------------------------
/docs/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["Alex Chi"]
 3 | language = "en"
 4 | multilingual = false
 5 | src = "src"
 6 | title = "the optd book"
 7 | 
 8 | [output.html]
 9 | additional-css = ["custom.css"]
10 | 


--------------------------------------------------------------------------------
/docs/custom.css:
--------------------------------------------------------------------------------
1 | .content img {
2 |     margin-left: auto;
3 |     margin-right: auto;
4 |     display: block;
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | # optd book
 4 | 
 5 | [Intro to optd]()
 6 | - [The Core]()
 7 |   - [Plan Representation]()
 8 |   - [Memo Table and Logical Equivalence]()
 9 |   - [Cascades Framework]()
10 |     - [Basic Cascades Tasks]()
11 |     - [Cycle Avoidance]()
12 |     - [Upper Bound Pruning]()
13 |     - [Multi-Stage Optimization]()
14 |   - [Rule IR and Matcher]()
15 |   - [Cost and Statistics]()
16 |   - [Logical Properties]()
17 |   - [Physical Properties and Enforcers]()
18 |     - [Memo Table: Subgoals and Winners]()
19 |     - [Cascades Tasks: Required Physical Properties]()
20 |   - [Exploration Budget]()
21 |   - [Heuristics Optimizer]()
22 | - [Integration with Datafusion]()
23 |   - [Datafusion Plan Representation]()
24 |   - [Datafusion Bridge]()
25 |   - [Rule Engine and Rules]()
26 |   - [Basic Cost Model]()
27 |   - [Logical and Physical Properties]()
28 |   - [Optimization Passes]()
29 |   - [Miscellaneous]()
30 |   - [Explain]()
31 | - [Research]()
32 |   - [Partial Exploration and Re-Optimization]()
33 |   - [Advanced Cost Model]()
34 |   - [The Hyper Subquery Unnesting Ruleset]()
35 | - [Testing and Benchmark]()
36 |   - [sqlplannertest]()
37 |   - [sqllogictest]()
38 |   - [perfbench]()
39 | - [Debugging and Tracing]()
40 |   - [optd-core Tracing]()
41 |   - [Memo Table Visualization]()
42 |   - [Optimizer Dump]()
43 | - [Contribution Guide]()
44 |   - [Install Tools]()
45 |   - [Contribution Workflow]() 
46 |   - [Add a Datafusion Rule]()
47 | - [What's Next]()
48 |   - [Ideas]()
49 |   - [RFCs]()
50 | ---
51 | 
52 | # DEPRECATED
53 | - [old optd book]()
54 |   - [Core Framework]()
55 |     - [Optimizer](./optimizer.md)
56 |     - [Plan Representation](./plan_repr.md)
57 |     - [Rule Engine](./rule_engine.md)
58 |     - [Cost Model](./cost_model.md)
59 |     - [Properties](./properties.md)
60 |   - [Integration]()
61 |     - [Apache Arrow Datafusion](./datafusion.md)
62 |   - [Adaptive Optimization]()
63 |     - [Re-optimization](./reoptimization.md)
64 |     - [Partial Exploration](./partial_exploration.md)
65 |   - [Demo]()
66 |     - [Three Join Demo](./demo_three_join.md)
67 |     - [TPC-H Q8 Demo](./demo_tpch_q8.md)
68 |   - [Performance Benchmarking]()
69 |     - [Cost Model Cardinality Benchmarking](./cost_model_benchmarking.md)
70 |   - [Functional Testing]()
71 |     - [SQLPlannerTest](./sqlplannertest.md)
72 |     - [Datafusion CLI](./datafusion_cli.md)
73 |   - [Miscellaneous](./miscellaneous.md)
74 | 


--------------------------------------------------------------------------------
/docs/src/cost_model.md:
--------------------------------------------------------------------------------
 1 | # Cost Model
 2 | 
 3 | Developers can plug their own cost models into optd. The cost must be represented as a vector of `f64`s, where the first element in the vector is the weighted cost. The optimizer will use weighted cost internally for cost comparison and select the winner for a group.
 4 | 
 5 | The cost model interface can be found in `optd-core/src/cost.rs`, and the core of the cost model is the cost computation process implemented in `CostModel::compute_cost`.
 6 | 
 7 | ```rust
 8 | pub trait CostModel<T: RelNodeTyp>: 'static + Send + Sync {
 9 |     fn compute_cost(
10 |             &self,
11 |             node: &T,
12 |             data: &Option<Value>,
13 |             children: &[Cost],
14 |             context: RelNodeContext,
15 |     ) -> Cost;
16 | }
17 | ```
18 | 
19 | `compute_cost` takes the cost of the children, the current plan node information, and some contexts of the current node. The context will be useful for adaptive optimization, and it contains the group ID and the expression ID of the current plan node, so that the adaptive cost model can use runtime information from the last run to compute the cost.
20 | 
21 | The optd Datafusion cost model stores 4 elements in the cost vector: weighted cost, row count, compute cost and I/O cost. The cost of the plan nodes and the SQL expressions can all be computed solely based on these information.
22 | 
23 | Contrary to other optimizer frameworks like Calcite, optd does not choose to implement the cost model as part of the plan node member functions. In optd, developers write all cost computation things in one file, so that testing and debugging the cost model all happens in one file (or in one `impl`).
24 | 


--------------------------------------------------------------------------------
/docs/src/datafusion_cli.md:
--------------------------------------------------------------------------------
 1 | # Datafusion CLI
 2 | 
 3 | Developers can interact with optd by using the Datafusion cli. The cli supports creating tables, populating data, and executing ANSI SQL queries.
 4 | 
 5 | ```shell
 6 | cargo run --bin datafusion-optd-cli
 7 | ```
 8 | 
 9 | We also have a scale 0.01 TPC-H dataset to test. The test SQL can be executed with the Datafusion cli.
10 | 
11 | ```shell
12 | cargo run --bin datafusion-optd-cli -- -f datafusion-optd-cli/tpch-sf0_01/test.sql
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/src/demo_three_join.md:
--------------------------------------------------------------------------------
 1 | # Three Join Demo
 2 | 
 3 | You can run this demo with the following command:
 4 | 
 5 | ```shell
 6 | cargo run --release --bin optd-adaptive-three-join
 7 | ```
 8 | 
 9 | We create 3 tables and join them. The underlying data are getting updated every time the query is executed.
10 | 
11 | ```sql
12 | select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
13 | ```
14 | 
15 | When the data distribution and the table size changes, the optimal join order will be different. The output of this demo is as below.
16 | 
17 | ```plain
18 | Iter  66: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/66=100.000
19 | Iter  67: (HashJoin (HashJoin t2 t1) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/67=98.507
20 | Iter  68: (HashJoin t2 (HashJoin t1 t3)) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/68=97.059
21 | Iter  69: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 67/69=97.101
22 | Iter  70: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 68/70=97.143
23 | Iter  71: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 69/71=97.183
24 | Iter  72: (HashJoin (HashJoin t2 t1) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 69/72=95.833
25 | ```
26 | 
27 | The left plan Lisp representation is the join order determined by the adaptive query optimization algorithm. The right plan is the best plan. The accuracy is the percentage of executions that the adaptive query optimization algorithm generates the best cost-optimal plan.
28 | 
29 | To find the optimal plan and compute the accuracy, we set up two optimizers in this demo: the normal optimizer and the optimal optimizer. Each time we insert some data into the tables, we will invoke the normal optimizer once, and invoke the optimal optimizer with all possible combination of join orders, so that the optimal optimizer can produce an optimal plan based on the cost model and the join selectivity.
30 | 
31 | As the algorithm can only know the runtime information from last run before new data are added into the tables, there will be some iterations where it cannot generate the optimal plan. But it will converge to the optimal plan as more runtime information is collected.
32 | 


--------------------------------------------------------------------------------
/docs/src/demo_tpch_q8.md:
--------------------------------------------------------------------------------
 1 | # TPC-H Q8 Demo
 2 | 
 3 | 
 4 | You can run this demo with the following command:
 5 | 
 6 | ```shell
 7 | cargo run --release --bin optd-adaptive-tpch-q8
 8 | ```
 9 | 
10 | In this demo, we create the TPC-H schema with test data of scale 0.01. There are 8 tables in TPC-H Q8, and it is impossible to enumerate all join combinations in one run. The demo will run this query multiple times, each time exploring a subset of the plan space. Therefore, optimization will be fast for each iteration, and as the plan space is more explored in each iteration, the produced plan will converge to the optimal join order.
11 | 
12 | ```plain
13 | --- ITERATION 5 ---
14 | plan space size budget used, not applying logical rules any more. current plan space: 10354
15 | (HashJoin region (HashJoin (HashJoin (HashJoin (HashJoin (HashJoin part (HashJoin supplier lineitem)) orders) customer) nation) nation))
16 | plan space size budget used, not applying logical rules any more. current plan space: 11743
17 | +--------+------------+
18 | | col0   | col1       |
19 | +--------+------------+
20 | | 1995.0 | 1.00000000 |
21 | | 1996.0 | 0.32989690 |
22 | +--------+------------+
23 | 2 rows in set. Query took 0.115 seconds.
24 | ```
25 | 
26 | The output contains the current join order in Lisp representation, the plan space, and the query result.
27 | 


--------------------------------------------------------------------------------
/docs/src/miscellaneous.md:
--------------------------------------------------------------------------------
 1 | # Miscellaneous
 2 | 
 3 | This is a note covering things that do not work well in the system right now.
 4 | 
 5 | ## Type System
 6 | 
 7 | Currently, we hard code decimal type to have `15, 2` precision. Type inferences should be done in the schema property inference.
 8 | 
 9 | ## Expression
10 | 
11 | optd supports exploring SQL expressions in the optimization process. However, this might be super inefficient as optimizing a plan node (i.e., join to hash join) usually needs the full binding of an expression tree. This could have exponential plan space and is super inefficient.
12 | 
13 | ## Bindings
14 | 
15 | We do not have something like a binding iterator as in the Cascades paper. Before applying a rule, we will generate all bindings of a group, which might take a lot of memory. This should be fixed in the future.
16 | 
17 | ## Cycle Detection + DAG
18 | 
19 | Consider the case for join commute rule.
20 | 
21 | ```
22 | (Join A B) <- group 1
23 | (Projection (Join B A) <expressions list>) <- group 2
24 | (Projection (Projection (Join A B) <expressions list>) <expressions list>) <- group 1 may refer itself
25 | ```
26 | 
27 | After applying the rule twice, the memo table will have self-referential groups. Currently, we detect such self-referential things in optimize group task. Probably there will be better ways to do that.
28 | 
29 | The same applies to DAG / Recursive CTEs -- we did not test if the framework works with DAG but in theory it should support it. We just need to ensure a node in DAG does not get searched twice.
30 | 
31 | # DAG
32 | 
33 | For DAG, another challenge is to recover the reusable fragments from the optimizer output. The optimizer can give you a DAG output but by iterating through the plan, you cannot know which parts can be reused/materialized. Therefore, we might need to produce some extra information with the plan node output. i.e., a graph-representation with metadata of each node, instead of `RelNode`. This also helps the process of inserting the physical collector plan nodes, which is currently a little bit hacky in the implementation.
34 | 
35 | ## Memo Table
36 | 
37 | Obviously, it is not efficient to simply store a mapping from RelNode to the expression id. Cannot imagine how many levels of depths will it require to compute a hash of a tree structure.
38 | 
39 | ## Partial Exploration
40 | 
41 | Each iteration will only be slower because we have to invoke the optimize group tasks before we can find a group to apply the rule. Probably we can keep the task stack across runs to make it faster.
42 | 
43 | ## Physical Property + Enforcer Rules
44 | 
45 | A major missing feature in the optimizer. Need this to support shuffling and sort optimizations.
46 | 
47 | ## Pruning
48 | 
49 | Currently, we have implemented the pruning condition as in the paper, but we did not actually enable it.
50 | 


--------------------------------------------------------------------------------
/docs/src/partial_exploration.md:
--------------------------------------------------------------------------------
1 | # Partial Exploration
2 | 
3 | When the plan space is very large, optd will generate a sub-optimal plan at first, and then use the runtime information to continue the plan space search next time the same query (or a similar query) is being optimized. This is partial exploration.
4 | 
5 | Developers can pass `partial_explore_iter` and `partial_explore_space` to the optimizer options to specify how large the optimizer will expand each time `step_optimize_rel` is invoked. To use partial exploration, developers should not clear the internal state of the optimizer across different runs.
6 | 


--------------------------------------------------------------------------------
/docs/src/properties.md:
--------------------------------------------------------------------------------
 1 | # Properties
 2 | 
 3 | In optd, properties are defined by implementing the `PropertyBuilder` trait in `optd-core/src/property.rs`. Properties will be automatically inferred when plan nodes are added to the memo table. When initializing an optimizer instance, developers will need to provide a vector of properties the optimizer will need to compute throughout the optimization process.
 4 | 
 5 | ## Define a Property
 6 | 
 7 | Currently, optd only supports logical properties. It cannot optimize a query plan with required physical properties for now. An example of property definition is the Datafusion representation's plan node schema, as in `optd-datafusion-repr/src/properties/schema.rs`.
 8 | 
 9 | 
10 | ```rust
11 | impl PropertyBuilder<OptRelNodeTyp> for SchemaPropertyBuilder {
12 |     type Prop = Schema;
13 | 
14 |     fn derive(
15 |         &self,
16 |         typ: OptRelNodeTyp,
17 |         data: Option<optd_core::rel_node::Value>,
18 |         children: &[&Self::Prop],
19 |     ) -> Self::Prop {
20 |         match typ {
21 |             OptRelNodeTyp::Scan => {
22 |                 let name = data.unwrap().as_str().to_string();
23 |                 self.catalog.get(&name)
24 |             }
25 |             // ...
26 | ```
27 | 
28 | The schema property builder implements the `derive` function, which takes the plan node type, plan node data, and the children properties, in order to infer the property of the current plan node. The schema property is stored as a vector of data types in `Schema` structure. In optd, property will be type-erased and stored as `Box<dyn Any>` along with each `RelNode` group in the memo table. On the developer side, it does not need to handle all the type-erasing things and will work with typed APIs.
29 | 
30 | ## Use a Property
31 | 
32 | When initializing an optimizer instance, developers will need to provide a vector of property builders to be computed. The property can then be retrieved using the index in the vector and the property builder type. For example, some optimizer rules will need to know the number of columns of a plan node before rewriting an expression.
33 | 
34 | For example, the current Datafusion optd optimizer is initialized with:
35 | 
36 | ```rust
37 | CascadesOptimizer::new_with_prop(
38 |     rules,
39 |     Box::new(cost_model),
40 |     vec![Box::new(SchemaPropertyBuilder::new(catalog))],
41 |     // ..
42 | ),
43 | ```
44 | 
45 | Therefore, developers can use index 0 and `SchemaPropertyBuilder` to retrieve the schema of a plan node after adding the node into the optimizer memo table.
46 | 
47 | ```rust
48 | impl PlanNode {
49 |     pub fn schema(&self, optimizer: CascadesOptimizer<OptRelNodeTyp>) -> Schema {
50 |         let group_id = optimizer.resolve_group_id(self.0.clone());
51 |         optimizer.get_property_by_group::<SchemaPropertyBuilder>(group_id, 0 /* property ID */)
52 |     }
53 | }
54 | ```
55 | 


--------------------------------------------------------------------------------
/docs/src/reoptimization.md:
--------------------------------------------------------------------------------
 1 | # Re-optimization
 2 | 
 3 | optd implements re-optimization inspired by [How I Learned to Stop Worrying and Love Re-optimization](https://arxiv.org/abs/1902.08291). optd generates a plan, injects executors to collect runtime data, and uses the runtime information from the previous run to guide the optimization process.
 4 | 
 5 | optd persists optimizer states from run to run. The states include: the memo table, whether a rule is applied on an expression, explored groups, etc. By persisting the states, optd can easily match a query plan or a subset of the query plan with plans that have been executed. Once these plan matches are discovered, the adaptive cost model can use the runtime data in the cost computation process to make the cost model more robust and accurate.
 6 | 
 7 | ## Cost Model
 8 | 
 9 | In the optd Datafusion representation, we have 2 cost models: the base cost model and the adaptive cost model. The base cost model estimates the compute and I/O cost solely based on number of rows. The adaptive cost model maintains a hash map that maps plan node group ID to runtime information from the previous N runs, and uses these runtime information to compute a more accurate row count. The adaptive cost model will use the accurate row count information to call into the base cost model that computes a more accurate compute and I/O cost.
10 | 
11 | ![re-optimization architecture](./optd-cascades/optd-reopt-architecture.svg)
12 |   
13 | ## Execution
14 | 
15 | optd will inject collector executors into the query plan. We extended Datafusion to have a new executor called physical collector. The executor will count the number of rows passed from the child executor to the parent executor, and then store the information into the runtime data storage.
16 | 
17 | ![re-optimization execution plan](./optd-cascades/optd-reopt-plan.svg)
18 | 
19 | ## Optimization Phases
20 | 
21 | To enable re-optimization, the user should not clear the internal state of the optimizer. This can be achieved by calling `step_clear_winner` and then `step_optimize_rel`.
22 | 


--------------------------------------------------------------------------------
/docs/src/sqlplannertest.md:
--------------------------------------------------------------------------------
1 | # SQLPlannerTest
2 | 
3 | optd uses risinglightdb's SQL planner test library to ensure the optimizer works correctly and stably produces an expected plan. SQL planner test is a regression test. Developers provide the test framework a yaml file with the queries to be optimized and the information they want to collect. The test framework generates the test result and store them in SQL files. When a developer submits a pull request, the reviewers should check if any of these outputs are changed unexpectedly.
4 | 
5 | The test cases can be found in `optd-sqlplannertest/tests`. Currently, we check if optd can enumerate all join orders by using the `explain:logical_join_orders,physical_plan` task and check if the query output is as expected by using the `execute` task.
6 | 


--------------------------------------------------------------------------------
/optd-adaptive-demo/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-adaptive-demo"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | datafusion-optd-cli = { path = "../datafusion-optd-cli" }
10 | rand = "0.8"
11 | datafusion = { version = "43.0.0", features = [
12 |     "avro",
13 |     "crypto_expressions",
14 |     "encoding_expressions",
15 |     "regex_expressions",
16 |     "unicode_expressions",
17 |     "compression",
18 | ] }
19 | mimalloc = { version = "0.1", default-features = false }
20 | tokio = { version = "1.24", features = [
21 |     "macros",
22 |     "rt",
23 |     "rt-multi-thread",
24 |     "sync",
25 |     "parking_lot",
26 | ] }
27 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge" }
28 | optd-datafusion-repr = { path = "../optd-datafusion-repr" }
29 | console = "0.15"
30 | 


--------------------------------------------------------------------------------
/optd-core/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-core"
 3 | description = "core library for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | anyhow = "1"
15 | tracing = "0.1"
16 | ordered-float = "4"
17 | itertools = "0.13"
18 | serde = { version = "1.0", features = ["derive", "rc"] }
19 | arrow-schema = "53.3.0"
20 | chrono = "0.4"
21 | erased-serde = "0.4"
22 | pollster = "0.4"
23 | stacker = "0.1"
24 | 
25 | [dev-dependencies]
26 | pretty_assertions = "1.4.1"
27 | 


--------------------------------------------------------------------------------
/optd-core/src/cascades.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | //! The core cascades optimizer implementation.
 7 | 
 8 | mod memo;
 9 | mod optimizer;
10 | pub mod rule_match;
11 | mod tasks2;
12 | 
13 | pub use memo::{Memo, NaiveMemo};
14 | pub use optimizer::{CascadesOptimizer, ExprId, GroupId, OptimizerProperties, RelNodeContext};
15 | 


--------------------------------------------------------------------------------
/optd-core/src/cost.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use crate::cascades::{CascadesOptimizer, Memo, RelNodeContext};
 7 | use crate::nodes::{ArcPredNode, NodeType};
 8 | 
 9 | /// The statistics of a group.
10 | pub struct Statistics(pub Box<dyn std::any::Any + Send + Sync + 'static>);
11 | 
12 | /// The cost of an operation. The cost is represented as a vector of double values.
13 | /// For example, it can be represented as `[compute_cost, io_cost]`.
14 | /// A lower value means a better cost.
15 | #[derive(Default, Clone, Debug, PartialOrd, PartialEq)]
16 | pub struct Cost(pub Vec<f64>);
17 | 
18 | pub trait CostModel<T: NodeType, M: Memo<T>>: 'static + Send + Sync {
19 |     /// Compute the cost of a single operation. `RelNodeContext` might be
20 |     /// optional in the future when we implement physical property enforcers.
21 |     /// If we have not decided the winner for a child group yet, the statistics
22 |     /// for that group will be `None`.
23 |     #[allow(clippy::too_many_arguments)]
24 |     fn compute_operation_cost(
25 |         &self,
26 |         node: &T,
27 |         predicates: &[ArcPredNode<T>],
28 |         children_stats: &[Option<&Statistics>],
29 |         context: RelNodeContext,
30 |         optimizer: &CascadesOptimizer<T, M>,
31 |     ) -> Cost;
32 | 
33 |     /// Derive the statistics of a single operation. `RelNodeContext` might be
34 |     /// optional in the future when we implement physical property enforcers.
35 |     fn derive_statistics(
36 |         &self,
37 |         node: &T,
38 |         predicates: &[ArcPredNode<T>],
39 |         children_stats: &[&Statistics],
40 |         context: RelNodeContext,
41 |         optimizer: &CascadesOptimizer<T, M>,
42 |     ) -> Statistics;
43 | 
44 |     fn explain_cost(&self, cost: &Cost) -> String;
45 | 
46 |     fn explain_statistics(&self, cost: &Statistics) -> String;
47 | 
48 |     fn accumulate(&self, total_cost: &mut Cost, cost: &Cost);
49 | 
50 |     fn sum(&self, operation_cost: &Cost, inputs_cost: &[Cost]) -> Cost {
51 |         let mut total_cost = operation_cost.clone();
52 |         for input in inputs_cost {
53 |             self.accumulate(&mut total_cost, input);
54 |         }
55 |         total_cost
56 |     }
57 | 
58 |     /// The zero cost.
59 |     fn zero(&self) -> Cost;
60 | 
61 |     /// The weighted cost of a compound cost.
62 |     fn weighted_cost(&self, cost: &Cost) -> f64;
63 | }
64 | 


--------------------------------------------------------------------------------
/optd-core/src/heuristics.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023-2024 CMU Database Group
2 | //
3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
4 | // https://opensource.org/licenses/MIT.
5 | 
6 | mod optimizer;
7 | 
8 | pub use optimizer::{ApplyOrder, HeuristicsOptimizer, HeuristicsOptimizerOptions};
9 | 


--------------------------------------------------------------------------------
/optd-core/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | #![allow(clippy::new_without_default)]
 7 | 
 8 | pub mod cascades;
 9 | pub mod cost;
10 | pub mod heuristics;
11 | pub mod logical_property;
12 | pub mod nodes;
13 | pub mod optimizer;
14 | pub mod physical_property;
15 | pub mod rules;
16 | 
17 | #[cfg(test)]
18 | pub(crate) mod tests;
19 | 


--------------------------------------------------------------------------------
/optd-core/src/logical_property.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::any::Any;
 7 | use std::fmt::{Debug, Display};
 8 | 
 9 | use crate::nodes::{ArcPredNode, NodeType};
10 | 
11 | /// The trait enables we store any logical property in the memo table by erasing the concrete type.
12 | /// In the future, we can implement `serialize`/`deserialize` on this trait so that we can serialize
13 | /// the logical properties.
14 | pub trait LogicalProperty: 'static + Any + Send + Sync + Debug + Display {
15 |     fn as_any(&self) -> &dyn Any;
16 | }
17 | 
18 | /// A wrapper around the `LogicalPropertyBuilder` so that we can erase the concrete type and store
19 | /// it safely in the memo table.
20 | pub trait LogicalPropertyBuilderAny<T: NodeType>: 'static + Send + Sync {
21 |     fn derive_any(
22 |         &self,
23 |         typ: T,
24 |         predicates: &[ArcPredNode<T>],
25 |         children: &[&dyn LogicalProperty],
26 |     ) -> Box<dyn LogicalProperty>;
27 |     fn property_name(&self) -> &'static str;
28 | }
29 | 
30 | /// The trait for building logical properties for a plan node.
31 | pub trait LogicalPropertyBuilder<T: NodeType>: 'static + Send + Sync + Sized {
32 |     type Prop: LogicalProperty + Sized + Clone;
33 | 
34 |     /// Derive the output logical property based on the input logical properties and the current plan node information.
35 |     fn derive(&self, typ: T, predicates: &[ArcPredNode<T>], children: &[&Self::Prop])
36 |         -> Self::Prop;
37 | 
38 |     fn property_name(&self) -> &'static str;
39 | }
40 | 
41 | impl<T: NodeType, P: LogicalPropertyBuilder<T>> LogicalPropertyBuilderAny<T> for P {
42 |     fn derive_any(
43 |         &self,
44 |         typ: T,
45 |         predicates: &[ArcPredNode<T>],
46 |         children: &[&dyn LogicalProperty],
47 |     ) -> Box<dyn LogicalProperty> {
48 |         let children: Vec<&P::Prop> = children
49 |             .iter()
50 |             .map(|child| {
51 |                 child
52 |                     .as_any()
53 |                     .downcast_ref::<P::Prop>()
54 |                     .expect("Failed to downcast child")
55 |             })
56 |             .collect();
57 |         Box::new(self.derive(typ, predicates, &children))
58 |     }
59 | 
60 |     fn property_name(&self) -> &'static str {
61 |         LogicalPropertyBuilder::property_name(self)
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/optd-core/src/optimizer.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use anyhow::Result;
 7 | 
 8 | use crate::logical_property::LogicalPropertyBuilder;
 9 | use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup};
10 | use crate::physical_property::PhysicalProperty;
11 | 
12 | pub trait Optimizer<T: NodeType> {
13 |     fn optimize(&mut self, root_rel: ArcPlanNode<T>) -> Result<ArcPlanNode<T>>;
14 | 
15 |     fn optimize_with_required_props(
16 |         &mut self,
17 |         root_rel: ArcPlanNode<T>,
18 |         required_props: &[&dyn PhysicalProperty],
19 |     ) -> Result<ArcPlanNode<T>>;
20 | 
21 |     fn get_logical_property<P: LogicalPropertyBuilder<T>>(
22 |         &self,
23 |         root_rel: PlanNodeOrGroup<T>,
24 |         idx: usize,
25 |     ) -> P::Prop;
26 | }
27 | 


--------------------------------------------------------------------------------
/optd-core/src/rules.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | mod ir;
 7 | 
 8 | pub use ir::RuleMatcher;
 9 | 
10 | use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup};
11 | use crate::optimizer::Optimizer;
12 | 
13 | // TODO: docs, possible renames.
14 | // TODO: Why do we have all of these match types? Seems like possible overkill.
15 | pub trait Rule<T: NodeType, O: Optimizer<T>>: 'static + Send + Sync {
16 |     fn matcher(&self) -> &RuleMatcher<T>;
17 |     fn apply(&self, optimizer: &O, binding: ArcPlanNode<T>) -> Vec<PlanNodeOrGroup<T>>;
18 |     fn name(&self) -> &'static str;
19 |     fn is_impl_rule(&self) -> bool {
20 |         false
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/optd-core/src/rules/ir.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use crate::nodes::NodeType;
 7 | 
 8 | pub enum RuleMatcher<T: NodeType> {
 9 |     /// Match a node of type `typ`.
10 |     MatchNode { typ: T, children: Vec<Self> },
11 |     /// Match "discriminant" (Only check for variant matches---don't consider
12 |     /// inner data).
13 |     /// This may be useful when, for example, one has an enum variant such as
14 |     /// ConstantExpr(ConstantType), and one wants to match on all ConstantExpr
15 |     /// regardless of the inner ConstantType.
16 |     MatchDiscriminant {
17 |         typ_discriminant: std::mem::Discriminant<T>,
18 |         children: Vec<Self>,
19 |     },
20 |     /// Match any plan node.
21 |     Any,
22 |     /// Match all plan node.
23 |     AnyMany,
24 | }
25 | 


--------------------------------------------------------------------------------
/optd-core/src/tests.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023-2024 CMU Database Group
2 | //
3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
4 | // https://opensource.org/licenses/MIT.
5 | 
6 | pub(crate) mod common;
7 | pub(crate) mod heuristics_physical_property;
8 | 


--------------------------------------------------------------------------------
/optd-datafusion-bridge/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-datafusion-bridge"
 3 | description = "datafusion bridge for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | datafusion = "43.0.0"
15 | datafusion-expr = "43.0.0"
16 | async-trait = "0.1"
17 | itertools = "0.13"
18 | optd-core = { path = "../optd-core", version = "0.1" }
19 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
20 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
21 | anyhow = "1"
22 | async-recursion = "1"
23 | futures-lite = "2"
24 | futures-util = "0.3"
25 | tracing = "0.1"
26 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr-adv-cost/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-datafusion-repr-adv-cost"
 3 | description = "datafusion plan representation for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | [dependencies]
12 | anyhow = "1"
13 | arrow-schema = "53.3.0"
14 | assert_approx_eq = "1.1.0"
15 | datafusion = "43.0.0"
16 | ordered-float = "4"
17 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
18 | optd-core = { path = "../optd-core", version = "0.1" }
19 | serde = { version = "1.0", features = ["derive"] }
20 | rayon = "1.10"
21 | itertools = "0.13"
22 | test-case = "3.3"
23 | tracing = "0.1"
24 | optd-gungnir = { path = "../optd-gungnir", version = "0.1" }
25 | serde_with = { version = "3.7.0", features = ["json"] }
26 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr-adv-cost/src/adv_stats/agg.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_datafusion_repr::plan_nodes::{ArcDfPredNode, DfReprPredNode, ListPred};
 7 | use optd_datafusion_repr::properties::column_ref::{
 8 |     BaseTableColumnRef, ColumnRef, GroupColumnRefs,
 9 | };
10 | use serde::de::DeserializeOwned;
11 | use serde::Serialize;
12 | 
13 | use super::AdvStats;
14 | use crate::adv_stats::stats::{Distribution, MostCommonValues};
15 | use crate::adv_stats::DEFAULT_NUM_DISTINCT;
16 | 
17 | impl<
18 |         M: MostCommonValues + Serialize + DeserializeOwned,
19 |         D: Distribution + Serialize + DeserializeOwned,
20 |     > AdvStats<M, D>
21 | {
22 |     pub(crate) fn get_agg_row_cnt(
23 |         &self,
24 |         group_by: ArcDfPredNode,
25 |         output_col_refs: GroupColumnRefs,
26 |     ) -> f64 {
27 |         let group_by = ListPred::from_pred_node(group_by).unwrap();
28 |         if group_by.is_empty() {
29 |             1.0
30 |         } else {
31 |             // Multiply the n-distinct of all the group by columns.
32 |             // TODO: improve with multi-dimensional n-distinct
33 |             output_col_refs
34 |                 .base_table_column_refs()
35 |                 .iter()
36 |                 .take(group_by.len())
37 |                 .map(|col_ref| match col_ref {
38 |                     ColumnRef::BaseTableColumnRef(BaseTableColumnRef { table, col_idx }) => {
39 |                         let table_stats = self.per_table_stats_map.get(table);
40 |                         let column_stats = table_stats.and_then(|table_stats| {
41 |                             table_stats.column_comb_stats.get(&vec![*col_idx])
42 |                         });
43 | 
44 |                         if let Some(column_stats) = column_stats {
45 |                             column_stats.ndistinct as f64
46 |                         } else {
47 |                             // The column type is not supported or stats are missing.
48 |                             DEFAULT_NUM_DISTINCT as f64
49 |                         }
50 |                     }
51 |                     ColumnRef::Derived => DEFAULT_NUM_DISTINCT as f64,
52 |                     _ => panic!(
53 |                         "GROUP BY base table column ref must either be derived or base table"
54 |                     ),
55 |                 })
56 |                 .product()
57 |         }
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr-adv-cost/src/adv_stats/limit.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_datafusion_repr::plan_nodes::{ArcDfPredNode, ConstantPred, DfReprPredNode};
 7 | use serde::de::DeserializeOwned;
 8 | use serde::Serialize;
 9 | 
10 | use super::AdvStats;
11 | use crate::adv_stats::stats::{Distribution, MostCommonValues};
12 | 
13 | impl<
14 |         M: MostCommonValues + Serialize + DeserializeOwned,
15 |         D: Distribution + Serialize + DeserializeOwned,
16 |     > AdvStats<M, D>
17 | {
18 |     pub(crate) fn get_limit_row_cnt(&self, child_row_cnt: f64, fetch_expr: ArcDfPredNode) -> f64 {
19 |         let fetch = ConstantPred::from_pred_node(fetch_expr)
20 |             .unwrap()
21 |             .value()
22 |             .as_i64();
23 |         // u64::MAX represents None
24 |         if fetch == i64::MAX {
25 |             child_row_cnt
26 |         } else {
27 |             child_row_cnt.min(fetch as f64)
28 |         }
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-datafusion-repr"
 3 | description = "datafusion plan representation for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | anyhow = "1"
15 | arrow-schema = "53.3.0"
16 | tracing = "0.1"
17 | pretty-xmlish = "0.1"
18 | itertools = "0.13"
19 | optd-core = { path = "../optd-core", version = "0.1" }
20 | camelpaste = "0.1"
21 | datafusion-expr = "43.0.0"
22 | serde = { version = "1.0", features = ["derive"] }
23 | bincode = "1.3.3"
24 | heck = "0.5"
25 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/cost.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod adaptive_cost;
 7 | pub mod base_cost;
 8 | 
 9 | pub use adaptive_cost::{AdaptiveCostModel, RuntimeAdaptionStorage};
10 | pub use base_cost::{DfCostModel, COMPUTE_COST, IO_COST};
11 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/optimizer_ext.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::PlanNodeOrGroup;
 7 | use optd_core::optimizer::Optimizer;
 8 | 
 9 | use crate::plan_nodes::DfNodeType;
10 | use crate::properties::column_ref::{ColumnRefPropertyBuilder, GroupColumnRefs};
11 | use crate::properties::schema::{Schema, SchemaPropertyBuilder};
12 | 
13 | pub trait OptimizerExt: Optimizer<DfNodeType> {
14 |     fn get_schema_of(&self, root_rel: PlanNodeOrGroup<DfNodeType>) -> Schema;
15 |     fn get_column_ref_of(&self, root_rel: PlanNodeOrGroup<DfNodeType>) -> GroupColumnRefs;
16 | }
17 | 
18 | impl<O: Optimizer<DfNodeType>> OptimizerExt for O {
19 |     fn get_schema_of(&self, root_rel: PlanNodeOrGroup<DfNodeType>) -> Schema {
20 |         self.get_logical_property::<SchemaPropertyBuilder>(root_rel, 0)
21 |     }
22 | 
23 |     fn get_column_ref_of(&self, root_rel: PlanNodeOrGroup<DfNodeType>) -> GroupColumnRefs {
24 |         self.get_logical_property::<ColumnRefPropertyBuilder>(root_rel, 1)
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/agg.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use super::macros::define_plan_node;
 7 | use super::predicates::ListPred;
 8 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode};
 9 | 
10 | #[derive(Clone, Debug)]
11 | pub struct LogicalAgg(pub ArcDfPlanNode);
12 | 
13 | define_plan_node!(
14 |     LogicalAgg : DfPlanNode,
15 |     Agg, [
16 |         { 0, child: ArcDfPlanNode }
17 |     ], [
18 |         { 0, exprs: ListPred },
19 |         { 1, groups: ListPred }
20 |     ]
21 | );
22 | 
23 | #[derive(Clone, Debug)]
24 | pub struct PhysicalAgg(pub ArcDfPlanNode);
25 | 
26 | define_plan_node!(
27 |     PhysicalAgg : DfPlanNode,
28 |     PhysicalAgg, [
29 |         { 0, child: ArcDfPlanNode }
30 |     ], [
31 |         { 0, aggrs: ListPred },
32 |         { 1, groups: ListPred }
33 |     ]
34 | );
35 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/filter.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use super::macros::define_plan_node;
 7 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode};
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub struct LogicalFilter(pub ArcDfPlanNode);
11 | 
12 | define_plan_node!(
13 |     LogicalFilter : DfPlanNode,
14 |     Filter, [
15 |         { 0, child: ArcDfPlanNode }
16 |     ], [
17 |         { 0, cond: ArcDfPredNode }
18 |     ]
19 | );
20 | 
21 | #[derive(Clone, Debug)]
22 | pub struct PhysicalFilter(pub ArcDfPlanNode);
23 | 
24 | define_plan_node!(
25 |     PhysicalFilter : DfPlanNode,
26 |     PhysicalFilter, [
27 |         { 0, child: ArcDfPlanNode }
28 |     ], [
29 |         { 0, cond: ArcDfPredNode }
30 |     ]
31 | );
32 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/join.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use core::fmt;
 7 | use std::fmt::Display;
 8 | 
 9 | use super::macros::define_plan_node;
10 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred};
11 | 
12 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
13 | pub enum JoinType {
14 |     Inner = 1,
15 |     FullOuter,
16 |     LeftOuter,
17 |     RightOuter,
18 |     LeftSemi,
19 |     RightSemi,
20 |     LeftAnti,
21 |     RightAnti,
22 |     LeftMark,
23 | }
24 | 
25 | impl Display for JoinType {
26 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 |         write!(f, "{:?}", self)
28 |     }
29 | }
30 | 
31 | #[derive(Clone, Debug)]
32 | pub struct LogicalJoin(pub ArcDfPlanNode);
33 | 
34 | define_plan_node!(
35 |     LogicalJoin : DfPlanNode,
36 |     Join, [
37 |         { 0, left: ArcDfPlanNode },
38 |         { 1, right: ArcDfPlanNode }
39 |     ], [
40 |         { 0, cond: ArcDfPredNode }
41 |     ], { join_type: JoinType }
42 | );
43 | 
44 | #[derive(Clone, Debug)]
45 | pub struct PhysicalNestedLoopJoin(pub ArcDfPlanNode);
46 | 
47 | define_plan_node!(
48 |     PhysicalNestedLoopJoin : DfPlanNode,
49 |     PhysicalNestedLoopJoin, [
50 |         { 0, left: ArcDfPlanNode },
51 |         { 1, right: ArcDfPlanNode }
52 |     ], [
53 |         { 0, cond: ArcDfPredNode }
54 |     ], { join_type: JoinType }
55 | );
56 | 
57 | #[derive(Clone, Debug)]
58 | pub struct PhysicalHashJoin(pub ArcDfPlanNode);
59 | 
60 | define_plan_node!(
61 |     PhysicalHashJoin : DfPlanNode,
62 |     PhysicalHashJoin, [
63 |         { 0, left: ArcDfPlanNode },
64 |         { 1, right: ArcDfPlanNode }
65 |     ], [
66 |         { 0, left_keys: ListPred },
67 |         { 1, right_keys: ListPred }
68 |     ], { join_type: JoinType }
69 | );
70 | 
71 | impl LogicalJoin {
72 |     /// Takes in left/right schema sizes, and maps a column index to be as if it
73 |     /// were pushed down to the left or right side of a join accordingly.
74 |     pub fn map_through_join(
75 |         col_idx: usize,
76 |         left_schema_size: usize,
77 |         right_schema_size: usize,
78 |     ) -> usize {
79 |         assert!(col_idx < left_schema_size + right_schema_size);
80 |         if col_idx < left_schema_size {
81 |             col_idx
82 |         } else {
83 |             col_idx - left_schema_size
84 |         }
85 |     }
86 | }
87 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/limit.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use super::macros::define_plan_node;
 7 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode};
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub struct LogicalLimit(pub ArcDfPlanNode);
11 | 
12 | define_plan_node!(
13 |     LogicalLimit : DfPlanNode,
14 |     Limit, [
15 |         { 0, child: ArcDfPlanNode }
16 |     ], [
17 |         { 0, skip: ArcDfPredNode },
18 |         { 1, fetch: ArcDfPredNode }
19 |     ]
20 | );
21 | 
22 | #[derive(Clone, Debug)]
23 | pub struct PhysicalLimit(pub ArcDfPlanNode);
24 | 
25 | define_plan_node!(
26 |     PhysicalLimit : DfPlanNode,
27 |     PhysicalLimit, [
28 |         { 0, child: ArcDfPlanNode }
29 |     ], [
30 |         { 0, skip: ArcDfPredNode },
31 |         { 1, fetch: ArcDfPredNode }
32 |     ]
33 | );
34 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/between_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::PlanNodeMetaMap;
 7 | use pretty_xmlish::Pretty;
 8 | 
 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
10 | 
11 | #[derive(Clone, Debug)]
12 | pub struct BetweenPred(pub ArcDfPredNode);
13 | 
14 | impl BetweenPred {
15 |     pub fn new(child: ArcDfPredNode, lower: ArcDfPredNode, upper: ArcDfPredNode) -> Self {
16 |         BetweenPred(
17 |             DfPredNode {
18 |                 typ: DfPredType::Between,
19 |                 children: vec![child, lower, upper],
20 |                 data: None,
21 |             }
22 |             .into(),
23 |         )
24 |     }
25 | 
26 |     pub fn child(&self) -> ArcDfPredNode {
27 |         self.0.child(0)
28 |     }
29 | 
30 |     pub fn lower(&self) -> ArcDfPredNode {
31 |         self.0.child(1)
32 |     }
33 | 
34 |     pub fn upper(&self) -> ArcDfPredNode {
35 |         self.0.child(2)
36 |     }
37 | }
38 | 
39 | impl DfReprPredNode for BetweenPred {
40 |     fn into_pred_node(self) -> ArcDfPredNode {
41 |         self.0
42 |     }
43 | 
44 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
45 |         if !matches!(pred_node.typ, DfPredType::Between) {
46 |             return None;
47 |         }
48 |         Some(Self(pred_node))
49 |     }
50 | 
51 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
52 |         Pretty::simple_record(
53 |             "Between",
54 |             vec![
55 |                 ("child", self.child().explain(meta_map)),
56 |                 ("lower", self.lower().explain(meta_map)),
57 |                 ("upper", self.upper().explain(meta_map)),
58 |             ],
59 |             vec![],
60 |         )
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/cast_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use arrow_schema::DataType;
 7 | use optd_core::nodes::PlanNodeMetaMap;
 8 | use pretty_xmlish::Pretty;
 9 | 
10 | use super::data_type_pred::DataTypePred;
11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
12 | 
13 | #[derive(Clone, Debug)]
14 | pub struct CastPred(pub ArcDfPredNode);
15 | 
16 | impl CastPred {
17 |     pub fn new(child: ArcDfPredNode, cast_to: DataType) -> Self {
18 |         CastPred(
19 |             DfPredNode {
20 |                 typ: DfPredType::Cast,
21 |                 children: vec![child, DataTypePred::new(cast_to).into_pred_node()],
22 |                 data: None,
23 |             }
24 |             .into(),
25 |         )
26 |     }
27 | 
28 |     pub fn child(&self) -> ArcDfPredNode {
29 |         self.0.child(0)
30 |     }
31 | 
32 |     pub fn cast_to(&self) -> DataType {
33 |         DataTypePred::from_pred_node(self.0.child(1))
34 |             .unwrap()
35 |             .data_type()
36 |     }
37 | }
38 | 
39 | impl DfReprPredNode for CastPred {
40 |     fn into_pred_node(self) -> ArcDfPredNode {
41 |         self.0
42 |     }
43 | 
44 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
45 |         if !matches!(pred_node.typ, DfPredType::Cast) {
46 |             return None;
47 |         }
48 |         Some(Self(pred_node))
49 |     }
50 | 
51 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
52 |         Pretty::simple_record(
53 |             "Cast",
54 |             vec![
55 |                 ("cast_to", format!("{}", self.cast_to()).into()),
56 |                 ("child", self.child().explain(meta_map)),
57 |             ],
58 |             vec![],
59 |         )
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/column_ref_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::{PlanNodeMetaMap, Value};
 7 | use pretty_xmlish::Pretty;
 8 | 
 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
10 | 
11 | #[derive(Clone, Debug)]
12 | pub struct ColumnRefPred(pub ArcDfPredNode);
13 | 
14 | impl ColumnRefPred {
15 |     /// Creates a new `ColumnRef` expression.
16 |     pub fn new(column_idx: usize) -> ColumnRefPred {
17 |         // this conversion is always safe since usize is at most u64
18 |         let u64_column_idx = column_idx as u64;
19 |         ColumnRefPred(
20 |             DfPredNode {
21 |                 typ: DfPredType::ColumnRef,
22 |                 children: vec![],
23 |                 data: Some(Value::UInt64(u64_column_idx)),
24 |             }
25 |             .into(),
26 |         )
27 |     }
28 | 
29 |     fn get_data_usize(&self) -> usize {
30 |         self.0.data.as_ref().unwrap().as_u64() as usize
31 |     }
32 | 
33 |     /// Gets the column index.
34 |     pub fn index(&self) -> usize {
35 |         self.get_data_usize()
36 |     }
37 | }
38 | 
39 | impl DfReprPredNode for ColumnRefPred {
40 |     fn into_pred_node(self) -> ArcDfPredNode {
41 |         self.0
42 |     }
43 | 
44 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
45 |         if pred_node.typ != DfPredType::ColumnRef {
46 |             return None;
47 |         }
48 |         Some(Self(pred_node))
49 |     }
50 | 
51 |     fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
52 |         Pretty::display(&format!("#{}", self.index()))
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/data_type_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use arrow_schema::DataType;
 7 | use optd_core::nodes::PlanNodeMetaMap;
 8 | use pretty_xmlish::Pretty;
 9 | 
10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
11 | 
12 | #[derive(Clone, Debug)]
13 | pub struct DataTypePred(pub ArcDfPredNode);
14 | 
15 | impl DataTypePred {
16 |     pub fn new(typ: DataType) -> Self {
17 |         DataTypePred(
18 |             DfPredNode {
19 |                 typ: DfPredType::DataType(typ),
20 |                 children: vec![],
21 |                 data: None,
22 |             }
23 |             .into(),
24 |         )
25 |     }
26 | 
27 |     pub fn data_type(&self) -> DataType {
28 |         if let DfPredType::DataType(ref data_type) = self.0.typ {
29 |             data_type.clone()
30 |         } else {
31 |             panic!("not a data type")
32 |         }
33 |     }
34 | }
35 | 
36 | impl DfReprPredNode for DataTypePred {
37 |     fn into_pred_node(self) -> ArcDfPredNode {
38 |         self.0
39 |     }
40 | 
41 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
42 |         if !matches!(pred_node.typ, DfPredType::DataType(_)) {
43 |             return None;
44 |         }
45 |         Some(Self(pred_node))
46 |     }
47 | 
48 |     fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
49 |         Pretty::display(&self.data_type().to_string())
50 |     }
51 | }
52 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/extern_column_ref_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::{PlanNodeMetaMap, Value};
 7 | use pretty_xmlish::Pretty;
 8 | 
 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
10 | 
11 | #[derive(Clone, Debug)]
12 | pub struct ExternColumnRefPred(pub ArcDfPredNode);
13 | 
14 | impl ExternColumnRefPred {
15 |     /// Creates a new `DepExternColumnRef` expression.
16 |     pub fn new(column_idx: usize) -> ExternColumnRefPred {
17 |         // this conversion is always safe since usize is at most u64
18 |         let u64_column_idx = column_idx as u64;
19 |         ExternColumnRefPred(
20 |             DfPredNode {
21 |                 typ: DfPredType::ExternColumnRef,
22 |                 children: vec![],
23 |                 data: Some(Value::UInt64(u64_column_idx)),
24 |             }
25 |             .into(),
26 |         )
27 |     }
28 | 
29 |     fn get_data_usize(&self) -> usize {
30 |         self.0.data.as_ref().unwrap().as_u64() as usize
31 |     }
32 | 
33 |     /// Gets the column index.
34 |     pub fn index(&self) -> usize {
35 |         self.get_data_usize()
36 |     }
37 | }
38 | 
39 | impl DfReprPredNode for ExternColumnRefPred {
40 |     fn into_pred_node(self) -> ArcDfPredNode {
41 |         self.0
42 |     }
43 | 
44 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
45 |         if pred_node.typ != DfPredType::ExternColumnRef {
46 |             return None;
47 |         }
48 |         Some(Self(pred_node))
49 |     }
50 | 
51 |     fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
52 |         Pretty::display(&format!("Extern(#{})", self.index()))
53 |     }
54 | }
55 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/in_list_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::{PlanNodeMetaMap, Value};
 7 | use pretty_xmlish::Pretty;
 8 | 
 9 | use super::ListPred;
10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
11 | 
12 | #[derive(Clone, Debug)]
13 | pub struct InListPred(pub ArcDfPredNode);
14 | 
15 | impl InListPred {
16 |     pub fn new(child: ArcDfPredNode, list: ListPred, negated: bool) -> Self {
17 |         InListPred(
18 |             DfPredNode {
19 |                 typ: DfPredType::InList,
20 |                 children: vec![child, list.into_pred_node()],
21 |                 data: Some(Value::Bool(negated)),
22 |             }
23 |             .into(),
24 |         )
25 |     }
26 | 
27 |     pub fn child(&self) -> ArcDfPredNode {
28 |         self.0.child(0)
29 |     }
30 | 
31 |     pub fn list(&self) -> ListPred {
32 |         ListPred::from_pred_node(self.0.child(1)).unwrap()
33 |     }
34 | 
35 |     /// `true` for `NOT IN`.
36 |     pub fn negated(&self) -> bool {
37 |         self.0.data.as_ref().unwrap().as_bool()
38 |     }
39 | }
40 | 
41 | impl DfReprPredNode for InListPred {
42 |     fn into_pred_node(self) -> ArcDfPredNode {
43 |         self.0
44 |     }
45 | 
46 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
47 |         if !matches!(pred_node.typ, DfPredType::InList) {
48 |             return None;
49 |         }
50 |         Some(Self(pred_node))
51 |     }
52 | 
53 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
54 |         Pretty::simple_record(
55 |             "InList",
56 |             vec![
57 |                 ("expr", self.child().explain(meta_map)),
58 |                 ("list", self.list().explain(meta_map)),
59 |                 ("negated", self.negated().to_string().into()),
60 |             ],
61 |             vec![],
62 |         )
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/list_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use itertools::Itertools;
 7 | use optd_core::nodes::PlanNodeMetaMap;
 8 | use pretty_xmlish::Pretty;
 9 | 
10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
11 | 
12 | #[derive(Clone, Debug)]
13 | pub struct ListPred(pub ArcDfPredNode);
14 | 
15 | impl ListPred {
16 |     pub fn new(preds: Vec<ArcDfPredNode>) -> Self {
17 |         ListPred(
18 |             DfPredNode {
19 |                 typ: DfPredType::List,
20 |                 children: preds,
21 |                 data: None,
22 |             }
23 |             .into(),
24 |         )
25 |     }
26 | 
27 |     /// Gets number of expressions in the list
28 |     pub fn len(&self) -> usize {
29 |         self.0.children.len()
30 |     }
31 | 
32 |     pub fn is_empty(&self) -> bool {
33 |         self.0.children.is_empty()
34 |     }
35 | 
36 |     pub fn child(&self, idx: usize) -> ArcDfPredNode {
37 |         self.0.child(idx)
38 |     }
39 | 
40 |     pub fn to_vec(&self) -> Vec<ArcDfPredNode> {
41 |         self.0.children.clone()
42 |     }
43 | }
44 | 
45 | impl DfReprPredNode for ListPred {
46 |     fn into_pred_node(self) -> ArcDfPredNode {
47 |         self.0
48 |     }
49 | 
50 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
51 |         if pred_node.typ != DfPredType::List {
52 |             return None;
53 |         }
54 |         Some(Self(pred_node))
55 |     }
56 | 
57 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
58 |         Pretty::Array(
59 |             (0..self.len())
60 |                 .map(|x| self.child(x).explain(meta_map))
61 |                 .collect_vec(),
62 |         )
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/sort_order_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::fmt::Display;
 7 | 
 8 | use optd_core::nodes::PlanNodeMetaMap;
 9 | use pretty_xmlish::Pretty;
10 | 
11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
12 | 
13 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
14 | pub enum SortOrderType {
15 |     Asc,
16 |     Desc,
17 | }
18 | 
19 | impl Display for SortOrderType {
20 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 |         write!(f, "{:?}", self)
22 |     }
23 | }
24 | 
25 | #[derive(Clone, Debug)]
26 | pub struct SortOrderPred(pub ArcDfPredNode);
27 | 
28 | impl SortOrderPred {
29 |     pub fn new(order: SortOrderType, child: ArcDfPredNode) -> Self {
30 |         SortOrderPred(
31 |             DfPredNode {
32 |                 typ: DfPredType::SortOrder(order),
33 |                 children: vec![child],
34 |                 data: None,
35 |             }
36 |             .into(),
37 |         )
38 |     }
39 | 
40 |     pub fn child(&self) -> ArcDfPredNode {
41 |         self.0.child(0)
42 |     }
43 | 
44 |     pub fn order(&self) -> SortOrderType {
45 |         if let DfPredType::SortOrder(order) = self.0.typ {
46 |             order
47 |         } else {
48 |             panic!("not a sort order expr")
49 |         }
50 |     }
51 | }
52 | 
53 | impl DfReprPredNode for SortOrderPred {
54 |     fn into_pred_node(self) -> ArcDfPredNode {
55 |         self.0
56 |     }
57 | 
58 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
59 |         if !matches!(pred_node.typ, DfPredType::SortOrder(_)) {
60 |             return None;
61 |         }
62 |         Some(Self(pred_node))
63 |     }
64 | 
65 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
66 |         Pretty::simple_record(
67 |             "SortOrder",
68 |             vec![("order", self.order().to_string().into())],
69 |             vec![self.child().explain(meta_map)],
70 |         )
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/predicates/un_op_pred.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::fmt::Display;
 7 | 
 8 | use optd_core::nodes::PlanNodeMetaMap;
 9 | use pretty_xmlish::Pretty;
10 | 
11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode};
12 | 
13 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
14 | pub enum UnOpType {
15 |     Neg = 1,
16 |     Not,
17 | }
18 | 
19 | impl Display for UnOpType {
20 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
21 |         write!(f, "{:?}", self)
22 |     }
23 | }
24 | 
25 | #[derive(Clone, Debug)]
26 | pub struct UnOpPred(pub ArcDfPredNode);
27 | 
28 | impl UnOpPred {
29 |     pub fn new(child: ArcDfPredNode, op_type: UnOpType) -> Self {
30 |         UnOpPred(
31 |             DfPredNode {
32 |                 typ: DfPredType::UnOp(op_type),
33 |                 children: vec![child],
34 |                 data: None,
35 |             }
36 |             .into(),
37 |         )
38 |     }
39 | 
40 |     pub fn child(&self) -> ArcDfPredNode {
41 |         self.0.child(0)
42 |     }
43 | 
44 |     pub fn op_type(&self) -> UnOpType {
45 |         if let DfPredType::UnOp(op_type) = self.0.typ {
46 |             op_type
47 |         } else {
48 |             panic!("not a un op")
49 |         }
50 |     }
51 | }
52 | 
53 | impl DfReprPredNode for UnOpPred {
54 |     fn into_pred_node(self) -> ArcDfPredNode {
55 |         self.0
56 |     }
57 | 
58 |     fn from_pred_node(pred_node: ArcDfPredNode) -> Option<Self> {
59 |         if !matches!(pred_node.typ, DfPredType::UnOp(_)) {
60 |             return None;
61 |         }
62 |         Some(Self(pred_node))
63 |     }
64 | 
65 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
66 |         Pretty::simple_record(
67 |             self.op_type().to_string(),
68 |             vec![],
69 |             vec![self.child().explain(meta_map)],
70 |         )
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/projection.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use super::macros::define_plan_node;
 7 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred};
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub struct LogicalProjection(pub ArcDfPlanNode);
11 | 
12 | define_plan_node!(
13 |     LogicalProjection : DfPlanNode,
14 |     Projection, [
15 |         { 0, child: ArcDfPlanNode }
16 |     ], [
17 |         { 0, exprs: ListPred }
18 |     ]
19 | );
20 | 
21 | #[derive(Clone, Debug)]
22 | pub struct PhysicalProjection(pub ArcDfPlanNode);
23 | 
24 | define_plan_node!(
25 |     PhysicalProjection : DfPlanNode,
26 |     PhysicalProjection, [
27 |         { 0, child: ArcDfPlanNode }
28 |     ], [
29 |         { 0, exprs: ListPred }
30 |     ]
31 | );
32 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/scan.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::sync::Arc;
 7 | 
 8 | use optd_core::nodes::PlanNodeMetaMap;
 9 | use pretty_xmlish::Pretty;
10 | 
11 | use super::{ArcDfPlanNode, ConstantPred, DfNodeType, DfPlanNode, DfReprPlanNode, DfReprPredNode};
12 | use crate::explain::Insertable;
13 | 
14 | #[derive(Clone, Debug)]
15 | pub struct LogicalScan(pub ArcDfPlanNode);
16 | 
17 | impl DfReprPlanNode for LogicalScan {
18 |     fn into_plan_node(self) -> ArcDfPlanNode {
19 |         self.0
20 |     }
21 | 
22 |     fn from_plan_node(plan_node: ArcDfPlanNode) -> Option<Self> {
23 |         if plan_node.typ != DfNodeType::Scan {
24 |             return None;
25 |         }
26 |         Some(Self(plan_node))
27 |     }
28 | 
29 |     fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
30 |         Pretty::childless_record(
31 |             "LogicalScan",
32 |             vec![("table", self.table().to_string().into())],
33 |         )
34 |     }
35 | }
36 | 
37 | impl LogicalScan {
38 |     pub fn new(table: String) -> LogicalScan {
39 |         LogicalScan(
40 |             DfPlanNode {
41 |                 typ: DfNodeType::Scan,
42 |                 children: vec![],
43 |                 predicates: vec![ConstantPred::string(table).into_pred_node()],
44 |             }
45 |             .into(),
46 |         )
47 |     }
48 | 
49 |     pub fn table(&self) -> Arc<str> {
50 |         ConstantPred::from_pred_node(self.0.predicates.first().unwrap().clone())
51 |             .unwrap()
52 |             .value()
53 |             .as_str()
54 |     }
55 | }
56 | 
57 | #[derive(Clone, Debug)]
58 | pub struct PhysicalScan(pub ArcDfPlanNode);
59 | 
60 | impl DfReprPlanNode for PhysicalScan {
61 |     fn into_plan_node(self) -> ArcDfPlanNode {
62 |         self.0
63 |     }
64 | 
65 |     fn from_plan_node(plan_node: ArcDfPlanNode) -> Option<Self> {
66 |         if plan_node.typ != DfNodeType::PhysicalScan {
67 |             return None;
68 |         }
69 |         Some(Self(plan_node))
70 |     }
71 | 
72 |     fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> {
73 |         let mut fields = vec![("table", self.table().to_string().into())];
74 |         if let Some(meta_map) = meta_map {
75 |             fields = fields.with_meta(self.0.get_meta(meta_map));
76 |         }
77 |         Pretty::childless_record("PhysicalScan", fields)
78 |     }
79 | }
80 | 
81 | impl PhysicalScan {
82 |     pub fn table(&self) -> Arc<str> {
83 |         ConstantPred::from_pred_node(self.0.predicates.first().unwrap().clone())
84 |             .unwrap()
85 |             .value()
86 |             .as_str()
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/sort.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use super::macros::define_plan_node;
 7 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred};
 8 | 
 9 | #[derive(Clone, Debug)]
10 | pub struct LogicalSort(pub ArcDfPlanNode);
11 | 
12 | // each expression in ExprList is represented as a SortOrderExpr
13 | // 1. nulls_first is not included from DF
14 | // 2. node type defines sort order per expression
15 | // 3. actual expr is stored as a child of this node
16 | define_plan_node!(
17 |     LogicalSort : DfPlanNode,
18 |     Sort, [
19 |         { 0, child: ArcDfPlanNode }
20 |     ], [
21 |         { 0, exprs: ListPred }
22 |     ]
23 | );
24 | 
25 | #[derive(Clone, Debug)]
26 | pub struct PhysicalSort(pub ArcDfPlanNode);
27 | 
28 | define_plan_node!(
29 |     PhysicalSort : DfPlanNode,
30 |     PhysicalSort, [
31 |         { 0, child: ArcDfPlanNode }
32 |     ], [
33 |         { 0, exprs: ListPred }
34 |     ]
35 | );
36 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/plan_nodes/subquery.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use core::fmt;
 7 | use std::fmt::Display;
 8 | 
 9 | use super::macros::define_plan_node;
10 | use super::{
11 |     ArcDfPlanNode, ArcDfPredNode, BinOpType, DfNodeType, DfPlanNode, DfPredNode, DfReprPlanNode,
12 |     ListPred,
13 | };
14 | 
15 | /// These are the only three fundamental types of subqueries.
16 | /// Refer to the Unnesting Arbitrary Queries talk by Mark Raasveldt for
17 | /// info on how to translate other subquery types to these three.
18 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
19 | pub enum SubqueryType {
20 |     Scalar,
21 |     Exists,
22 |     Any { pred: DfPredNode, op: BinOpType },
23 | }
24 | 
25 | impl Display for SubqueryType {
26 |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
27 |         write!(f, "{:?}", self)
28 |     }
29 | }
30 | 
31 | #[derive(Clone, Debug)]
32 | pub struct RawDependentJoin(pub ArcDfPlanNode);
33 | 
34 | define_plan_node!(
35 |     RawDependentJoin : DfReprPlanNode,
36 |     RawDepJoin, [
37 |         { 0, left: ArcDfPlanNode },
38 |         { 1, right: ArcDfPlanNode }
39 |     ], [
40 |         { 0, cond: ArcDfPredNode },
41 |         { 1, extern_cols: ListPred }
42 |     ], { sq_type: SubqueryType }
43 | );
44 | 
45 | #[derive(Clone, Debug)]
46 | pub struct DependentJoin(pub ArcDfPlanNode);
47 | 
48 | define_plan_node!(
49 |     DependentJoin : DfReprPlanNode,
50 |     DepJoin, [
51 |         { 0, left: ArcDfPlanNode },
52 |         { 1, right: ArcDfPlanNode }
53 |     ], [
54 |         { 0, cond: ArcDfPredNode },
55 |         { 1, extern_cols: ListPred }
56 |     ]
57 | );
58 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/properties.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod column_ref;
 7 | pub mod schema;
 8 | 
 9 | const DEFAULT_NAME: &str = "unnamed";
10 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/rules.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | mod eliminate_duplicated_expr;
 7 | mod eliminate_limit;
 8 | mod filter;
 9 | mod filter_pushdown;
10 | mod joins;
11 | mod macros;
12 | mod physical;
13 | mod project_transpose;
14 | mod subquery;
15 | 
16 | pub use eliminate_duplicated_expr::*;
17 | pub use eliminate_limit::*;
18 | pub use filter::*;
19 | pub use filter_pushdown::*;
20 | pub use joins::*;
21 | pub use physical::PhysicalConversionRule;
22 | pub use project_transpose::*;
23 | pub use subquery::{
24 |     DepInitialDistinct, DepJoinEliminate, DepJoinPastAgg, DepJoinPastFilter, DepJoinPastProj,
25 | };
26 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/rules/eliminate_limit.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::nodes::PlanNodeOrGroup;
 7 | use optd_core::optimizer::Optimizer;
 8 | use optd_core::rules::{Rule, RuleMatcher};
 9 | 
10 | use super::macros::define_rule;
11 | use crate::plan_nodes::{
12 |     ArcDfPlanNode, ConstantPred, ConstantType, DfNodeType, DfPredType, DfReprPlanNode,
13 |     DfReprPredNode, LogicalEmptyRelation, LogicalLimit,
14 | };
15 | use crate::OptimizerExt;
16 | 
17 | define_rule!(EliminateLimitRule, apply_eliminate_limit, (Limit, child));
18 | 
19 | /// Transformations:
20 | ///     - Limit with skip 0 and no fetch -> Eliminate from the tree
21 | ///     - Limit with limit 0 -> EmptyRelation
22 | fn apply_eliminate_limit(
23 |     optimizer: &impl Optimizer<DfNodeType>,
24 |     binding: ArcDfPlanNode,
25 | ) -> Vec<PlanNodeOrGroup<DfNodeType>> {
26 |     let limit = LogicalLimit::from_plan_node(binding).unwrap();
27 |     let skip = limit.skip();
28 |     let fetch = limit.fetch();
29 |     let child = limit.child();
30 |     if let DfPredType::Constant(ConstantType::Int64) = skip.typ {
31 |         if let DfPredType::Constant(ConstantType::Int64) = fetch.typ {
32 |             let skip_val = ConstantPred::from_pred_node(skip).unwrap().value().as_i64();
33 | 
34 |             let fetch_val = ConstantPred::from_pred_node(fetch)
35 |                 .unwrap()
36 |                 .value()
37 |                 .as_i64();
38 | 
39 |             // Bad convention to have u64 max represent None
40 |             let fetch_is_none = fetch_val == i64::MAX;
41 | 
42 |             let schema = optimizer.get_schema_of(child.clone());
43 |             if fetch_is_none && skip_val == 0 {
44 |                 return vec![child];
45 |             } else if fetch_val == 0 {
46 |                 let node = LogicalEmptyRelation::new(false, schema);
47 |                 return vec![node.into_plan_node().into()];
48 |             }
49 |         }
50 |     }
51 |     vec![]
52 | }
53 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/rules/project_transpose.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod project_filter_transpose;
 7 | pub mod project_join_transpose;
 8 | pub mod project_merge;
 9 | pub mod project_transpose_common;
10 | 
11 | pub use project_filter_transpose::*;
12 | pub use project_join_transpose::*;
13 | pub use project_merge::*;
14 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::vec;
 7 | 
 8 | use optd_core::nodes::PlanNodeOrGroup;
 9 | use optd_core::optimizer::Optimizer;
10 | use optd_core::rules::RuleMatcher;
11 | 
12 | use super::project_transpose_common::ProjectionMapping;
13 | use crate::plan_nodes::{
14 |     ArcDfPlanNode, ColumnRefPred, DfNodeType, DfReprPlanNode, DfReprPredNode, JoinType, ListPred,
15 |     LogicalJoin, LogicalProjection,
16 | };
17 | use crate::rules::macros::define_rule;
18 | use crate::{OptimizerExt, Rule};
19 | 
20 | // (Proj A) join B -> (Proj (A join B))
21 | define_rule!(
22 |     ProjectionPullUpJoin,
23 |     apply_projection_pull_up_join,
24 |     (Join(JoinType::Inner), (Projection, left), right)
25 | );
26 | 
27 | fn apply_projection_pull_up_join(
28 |     optimizer: &impl Optimizer<DfNodeType>,
29 |     binding: ArcDfPlanNode,
30 | ) -> Vec<PlanNodeOrGroup<DfNodeType>> {
31 |     let join = LogicalJoin::from_plan_node(binding).unwrap();
32 |     let projection = LogicalProjection::from_plan_node(join.left().unwrap_plan_node()).unwrap();
33 |     let left = projection.child();
34 |     let right = join.right();
35 |     let list = projection.exprs();
36 |     let cond = join.cond();
37 | 
38 |     let projection = LogicalProjection::new_unchecked(left.clone(), list.clone());
39 | 
40 |     let Some(mapping) = ProjectionMapping::build(&projection.exprs()) else {
41 |         return vec![];
42 |     };
43 | 
44 |     // TODO(chi): support capture projection node.
45 |     let left_schema = optimizer.get_schema_of(left.clone());
46 |     let right_schema = optimizer.get_schema_of(right.clone());
47 |     let mut new_projection_exprs = list.to_vec();
48 |     for i in 0..right_schema.len() {
49 |         let col = ColumnRefPred::new(i + left_schema.len()).into_pred_node();
50 |         new_projection_exprs.push(col);
51 |     }
52 |     let node = LogicalProjection::new(
53 |         LogicalJoin::new_unchecked(
54 |             left,
55 |             right,
56 |             mapping.rewrite_join_cond(cond, left_schema.len()),
57 |             JoinType::Inner,
58 |         )
59 |         .into_plan_node(),
60 |         ListPred::new(new_projection_exprs),
61 |     );
62 |     vec![node.into_plan_node().into()]
63 | }
64 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/rules/subquery.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod depjoin_pushdown;
 7 | 
 8 | pub use depjoin_pushdown::{
 9 |     DepInitialDistinct, DepJoinEliminate, DepJoinPastAgg, DepJoinPastFilter, DepJoinPastProj,
10 | };
11 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/testing.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | mod dummy_cost;
 7 | mod tpch_catalog;
 8 | 
 9 | use std::sync::Arc;
10 | 
11 | use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer, HeuristicsOptimizerOptions};
12 | use optd_core::rules::Rule;
13 | 
14 | use self::tpch_catalog::TpchCatalog;
15 | use crate::plan_nodes::DfNodeType;
16 | use crate::properties::schema::SchemaPropertyBuilder;
17 | 
18 | /// Create a "dummy" optimizer preloaded with the TPC-H catalog for testing
19 | /// Note: Only provides the schema property currently
20 | pub fn new_test_optimizer(
21 |     rule: Arc<dyn Rule<DfNodeType, HeuristicsOptimizer<DfNodeType>>>,
22 | ) -> HeuristicsOptimizer<DfNodeType> {
23 |     let dummy_catalog = Arc::new(TpchCatalog);
24 | 
25 |     HeuristicsOptimizer::new_with_rules(
26 |         vec![rule],
27 |         HeuristicsOptimizerOptions {
28 |             apply_order: ApplyOrder::TopDown,
29 |             enable_physical_prop_passthrough: true,
30 |         },
31 |         Arc::new([Box::new(SchemaPropertyBuilder::new(dummy_catalog))]),
32 |         Arc::new([]),
33 |     )
34 | }
35 | 


--------------------------------------------------------------------------------
/optd-datafusion-repr/src/testing/dummy_cost.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext};
 7 | use optd_core::cost::{Cost, CostModel, Statistics};
 8 | 
 9 | use crate::plan_nodes::{ArcDfPredNode, DfNodeType};
10 | 
11 | /// Dummy cost model that returns a 0 cost in all cases.
12 | /// Intended for testing with the cascades optimizer.
13 | pub struct DummyCostModel;
14 | 
15 | impl CostModel<DfNodeType, NaiveMemo<DfNodeType>> for DummyCostModel {
16 |     /// Compute the cost of a single operation
17 |     fn compute_operation_cost(
18 |         &self,
19 |         _: &DfNodeType,
20 |         _: &[ArcDfPredNode],
21 |         _: &[Option<&Statistics>],
22 |         _: RelNodeContext,
23 |         _: &CascadesOptimizer<DfNodeType>,
24 |     ) -> Cost {
25 |         Cost(vec![1.0])
26 |     }
27 | 
28 |     /// Derive the statistics of a single operation
29 |     fn derive_statistics(
30 |         &self,
31 |         _: &DfNodeType,
32 |         _: &[ArcDfPredNode],
33 |         _: &[&Statistics],
34 |         _: RelNodeContext,
35 |         _: &CascadesOptimizer<DfNodeType>,
36 |     ) -> Statistics {
37 |         Statistics(Box::new(()))
38 |     }
39 | 
40 |     fn explain_cost(&self, _: &Cost) -> String {
41 |         "dummy_cost".to_string()
42 |     }
43 | 
44 |     fn explain_statistics(&self, _: &Statistics) -> String {
45 |         "dummy_statistics".to_string()
46 |     }
47 | 
48 |     fn weighted_cost(&self, cost: &Cost) -> f64 {
49 |         cost.0[0]
50 |     }
51 | 
52 |     fn accumulate(&self, total_cost: &mut Cost, cost: &Cost) {
53 |         total_cost.0[0] += cost.0[0];
54 |     }
55 | 
56 |     fn zero(&self) -> Cost {
57 |         Cost(vec![0.0])
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/optd-gungnir/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-gungnir"
 3 | description = "statistics for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | itertools = "0.13"
15 | rand = "0.8"
16 | crossbeam = "0.8"
17 | lazy_static = "1.4"
18 | serde = { version = "1.0", features = ["derive"] }
19 | serde_with = { version = "3.7.0", features = ["json"] }
20 | ordered-float = "4"
21 | optd-core = { path = "../optd-core", version = "0.1.0" }
22 | hashbrown = { version = "0.14", features = ["serde"] }
23 | murmur2 = "0.1"
24 | 


--------------------------------------------------------------------------------
/optd-gungnir/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | #![allow(clippy::new_without_default)]
 7 | 
 8 | pub mod stats;
 9 | pub mod utils;
10 | 


--------------------------------------------------------------------------------
/optd-gungnir/src/stats.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod counter;
 7 | pub mod hyperloglog;
 8 | pub mod misragries;
 9 | pub mod murmur2;
10 | pub mod tdigest;
11 | 


--------------------------------------------------------------------------------
/optd-gungnir/src/stats/murmur2.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | //! Implementation of the MurmurHash2 function, for 64b outputs, by Austin Appleby (2008).
 7 | //! Note: Assumes little-endian machines.
 8 | 
 9 | /// Returns the MurmurHash2 (u64) given a stream of bytes and a seed.
10 | pub fn murmur_hash(bytes: &[u8], seed: u64) -> u64 {
11 |     murmur2::murmur64a(bytes, seed)
12 | }
13 | 
14 | // Start of unit testing section.
15 | #[cfg(test)]
16 | mod tests {
17 |     use super::murmur_hash;
18 |     #[test]
19 |     fn murmur_string() {
20 |         assert_eq!(
21 |             murmur_hash("Hyper🪵🪵 Rules!".as_bytes(), 1257851387),
22 |             1623602735526180105
23 |         );
24 |         assert_eq!(
25 |             murmur_hash(
26 |                 "All work and no play makes Jack a dull boy".as_bytes(),
27 |                 1111111111
28 |             ),
29 |             1955247671966919985
30 |         );
31 |         assert_eq!(murmur_hash("".as_bytes(), 0), 0);
32 |         assert_eq!(
33 |             murmur_hash("Gungnir™".as_bytes(), 4242424242),
34 |             13329505761566523763
35 |         );
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/optd-gungnir/src/utils.rs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2023-2024 CMU Database Group
2 | //
3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
4 | // https://opensource.org/licenses/MIT.
5 | 
6 | pub mod arith_encoder;
7 | 


--------------------------------------------------------------------------------
/optd-perfbench/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-perfbench"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | datafusion = { version = "43.0.0", features = [
10 |     "avro",
11 |     "crypto_expressions",
12 |     "encoding_expressions",
13 |     "regex_expressions",
14 |     "unicode_expressions",
15 |     "compression",
16 |     "serde",
17 | ] }
18 | optd-datafusion-repr = { path = "../optd-datafusion-repr" }
19 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" }
20 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge" }
21 | datafusion-optd-cli = { path = "../datafusion-optd-cli" }
22 | futures = "0.3"
23 | anyhow = { version = "1", features = ["backtrace"] }
24 | async-trait = "0.1"
25 | tokio = { version = "1.24", features = [
26 |     "macros",
27 |     "rt",
28 |     "rt-multi-thread",
29 |     "sync",
30 |     "parking_lot",
31 | ] }
32 | shlex = "1.3"
33 | tokio-postgres = "0.7"
34 | regex = "1.10"
35 | clap = { version = "4.5.4", features = ["derive"] }
36 | log = "0.4"
37 | env_logger = "0.11"
38 | lazy_static = "1.4.0"
39 | prettytable-rs = "0.10"
40 | serde = "1.0"
41 | serde_json = "1.0"
42 | test-case = "3.3"
43 | rayon = "1.10"
44 | parquet = "53.3.0"
45 | csv2parquet = { git = "https://github.com/skyzh/arrow-tools.git", branch = "main" }
46 | 
47 | [dev-dependencies]
48 | assert_cmd = "2.0"
49 | 


--------------------------------------------------------------------------------
/optd-perfbench/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | pub mod benchmark;
 7 | pub mod cardbench;
 8 | mod datafusion_dbms;
 9 | pub mod job;
10 | mod postgres_dbms;
11 | pub mod shell;
12 | pub mod tpch;
13 | mod truecard;
14 | 


--------------------------------------------------------------------------------
/optd-perfbench/src/truecard.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::collections::HashMap;
 7 | use std::fs::{self, File};
 8 | use std::path::{Path, PathBuf};
 9 | 
10 | use async_trait::async_trait;
11 | 
12 | use crate::benchmark::Benchmark;
13 | 
14 | #[async_trait]
15 | pub trait TruecardGetter {
16 |     async fn get_benchmark_truecards(
17 |         &mut self,
18 |         benchmark: &Benchmark,
19 |     ) -> anyhow::Result<Vec<usize>>;
20 | }
21 | 
22 | /// A cache that gets persisted to disk for the true cardinalities of all queries of all benchmarks
23 | pub struct TruecardCache {
24 |     truecard_cache_fpath: PathBuf,
25 |     cache: HashMap<String, HashMap<String, usize>>,
26 | }
27 | 
28 | impl TruecardCache {
29 |     pub fn build<P: AsRef<Path>>(truecard_cache_fpath: P) -> anyhow::Result<Self> {
30 |         let truecard_cache_fpath = PathBuf::from(truecard_cache_fpath.as_ref());
31 |         let cache = if truecard_cache_fpath.exists() {
32 |             let file = File::open(&truecard_cache_fpath)?;
33 |             serde_json::from_reader(file)?
34 |         } else {
35 |             HashMap::new()
36 |         };
37 | 
38 |         Ok(Self {
39 |             truecard_cache_fpath,
40 |             cache,
41 |         })
42 |     }
43 | 
44 |     pub fn insert_truecard(
45 |         &mut self,
46 |         data_and_queries_name: &str,
47 |         query_id: &str,
48 |         truecard: usize,
49 |     ) {
50 |         let db_cache = match self.cache.get_mut(data_and_queries_name) {
51 |             Some(db_cache) => db_cache,
52 |             None => {
53 |                 self.cache
54 |                     .insert(String::from(data_and_queries_name), HashMap::new());
55 |                 self.cache.get_mut(data_and_queries_name).unwrap()
56 |             }
57 |         };
58 |         db_cache.insert(String::from(query_id), truecard);
59 |     }
60 | 
61 |     pub fn get_truecard(&self, data_and_queries_name: &str, query_id: &str) -> Option<usize> {
62 |         self.cache
63 |             .get(data_and_queries_name)
64 |             .and_then(|db_cache| db_cache.get(query_id).copied())
65 |     }
66 | 
67 |     pub fn save(&self) -> anyhow::Result<()> {
68 |         fs::create_dir_all(self.truecard_cache_fpath.parent().unwrap())?;
69 |         // this will create a new file or truncate the file if it already exists
70 |         let file = File::create(&self.truecard_cache_fpath)?;
71 |         serde_json::to_writer_pretty(file, &self.cache)?;
72 |         Ok(())
73 |     }
74 | }
75 | 
76 | impl Drop for TruecardCache {
77 |     fn drop(&mut self) {
78 |         self.save().unwrap();
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-sqllogictest"
 3 | description = "sqllogictest for optd"
 4 | version.workspace = true
 5 | edition.workspace = true
 6 | homepage.workspace = true
 7 | keywords.workspace = true
 8 | license.workspace = true
 9 | repository.workspace = true
10 | 
11 | [dependencies]
12 | thiserror = "2"
13 | sqllogictest = "0.22"
14 | clap = { version = "4.5.4", features = ["derive"] }
15 | anyhow = { version = "1", features = ["backtrace"] }
16 | async-trait = "0.1"
17 | datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "43.0.0" }
18 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
19 | datafusion = { version = "43.0.0", features = [
20 |     "avro",
21 |     "crypto_expressions",
22 |     "encoding_expressions",
23 |     "regex_expressions",
24 |     "unicode_expressions",
25 |     "compression",
26 | ] }
27 | env_logger = "0.9"
28 | mimalloc = { version = "0.1", default-features = false }
29 | regex = "1.8"
30 | tokio = { version = "1.24", features = [
31 |     "macros",
32 |     "rt",
33 |     "rt-multi-thread",
34 |     "sync",
35 |     "parking_lot",
36 | ] }
37 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" }
38 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
39 | itertools = "0.13"
40 | lazy_static = "1.4.0"
41 | 
42 | [[test]]
43 | name = "harness"
44 | path = "./tests/harness.rs"
45 | harness = false
46 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/_basic_tables.slt.part:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table t1(v1 int, v2 int);
 3 | 
 4 | statement ok
 5 | create table t2(v3 int, v4 int);
 6 | 
 7 | statement ok
 8 | insert into t1 values (1, 100), (2, 200), (2, 250), (3, 300), (3, 300);
 9 | 
10 | statement ok
11 | insert into t2 values (2, 200), (2, 250), (3, 300);
12 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/basic.slt:
--------------------------------------------------------------------------------
1 | query I
2 | select 1;
3 | ----
4 | 1
5 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q1.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     l_returnflag,
 6 |     l_linestatus,
 7 |     sum(l_quantity) as sum_qty,
 8 |     sum(l_extendedprice) as sum_base_price,
 9 |     sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
10 |     sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
11 |     avg(l_quantity) as avg_qty,
12 |     avg(l_extendedprice) as avg_price,
13 |     avg(l_discount) as avg_disc,
14 |     count(*) as count_order
15 | from
16 |     lineitem
17 | where
18 |     l_shipdate <= date '1998-12-01' - interval '71' day
19 | group by
20 |     l_returnflag,
21 |     l_linestatus
22 | order by
23 |     l_returnflag,
24 |     l_linestatus;
25 | ----
26 | A F 37474.00 37569624.64 35676192.0970 37101416.222424 25.354533 25419.231826 0.050866 1478
27 | N F 1041.00 1041301.07 999060.8980 1036450.802280 27.394736 27402.659736 0.042894 38
28 | N O 75823.00 76040604.76 72270477.1588 75140545.284463 25.564059 25637.425745 0.049824 2966
29 | R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457
30 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q11.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     ps_partkey,
 6 |     sum(ps_supplycost * ps_availqty) as value
 7 | from
 8 |     partsupp,
 9 |     supplier,
10 |     nation
11 | where
12 |     ps_suppkey = s_suppkey
13 |     and s_nationkey = n_nationkey
14 |     and n_name = 'GERMANY'
15 | group by
16 |     ps_partkey having
17 |         sum(ps_supplycost * ps_availqty) > (
18 |             select
19 |                 sum(ps_supplycost * ps_availqty) * 0.0001000000
20 |             from
21 |                 partsupp,
22 |                 supplier,
23 |                 nation
24 |             where
25 |                 ps_suppkey = s_suppkey
26 |                 and s_nationkey = n_nationkey
27 |                 and n_name = 'GERMANY'
28 |         )
29 | order by
30 |     value desc;
31 | ----
32 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q12.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     l_shipmode,
 6 |     sum(case
 7 |         when o_orderpriority = '1-URGENT'
 8 |             or o_orderpriority = '2-HIGH'
 9 |             then 1
10 |         else 0
11 |     end) as high_line_count,
12 |     sum(case
13 |         when o_orderpriority <> '1-URGENT'
14 |             and o_orderpriority <> '2-HIGH'
15 |             then 1
16 |         else 0
17 |     end) as low_line_count
18 | from
19 |     orders,
20 |     lineitem
21 | where
22 |     o_orderkey = l_orderkey
23 |     and l_shipmode in ('MAIL', 'SHIP')
24 |     and l_commitdate < l_receiptdate
25 |     and l_shipdate < l_commitdate
26 |     and l_receiptdate >= date '1994-01-01'
27 |     and l_receiptdate < date '1994-01-01' + interval '1' year
28 | group by
29 |     l_shipmode
30 | order by
31 |     l_shipmode;
32 | ----
33 | MAIL    5    5
34 | SHIP    5   10
35 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q13.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     c_count,
 6 |     count(*) as custdist
 7 | from
 8 |     (
 9 |         select
10 |             c_custkey,
11 |             count(o_orderkey)
12 |         from
13 |             customer left outer join orders on
14 |                 c_custkey = o_custkey
15 |                 and o_comment not like '%special%requests%'
16 |         group by
17 |             c_custkey
18 |     ) as c_orders (c_custkey, c_count)
19 | group by
20 |     c_count
21 | order by
22 |     custdist desc,
23 |     c_count desc;
24 | ----
25 | 0 50
26 | 16 8
27 | 17 7
28 | 20 6
29 | 13 6
30 | 12 6
31 | 9 6
32 | 23 5
33 | 14 5
34 | 10 5
35 | 21 4
36 | 18 4
37 | 11 4
38 | 8 4
39 | 7 4
40 | 26 3
41 | 22 3
42 | 6 3
43 | 5 3
44 | 4 3
45 | 29 2
46 | 24 2
47 | 19 2
48 | 15 2
49 | 28 1
50 | 25 1
51 | 3 1 
52 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q14.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     100.00 * sum(case
 6 |         when p_type like 'PROMO%'
 7 |             then l_extendedprice * (1 - l_discount)
 8 |         else 0
 9 |     end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
10 | from
11 |     lineitem,
12 |     part
13 | where
14 |     l_partkey = p_partkey
15 |     and l_shipdate >= date '1995-09-01'
16 |     and l_shipdate < date '1995-09-01' + interval '1' month;
17 | ----
18 | 15.23021261159725
19 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q15.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | statement ok
 4 | create view revenue0 (supplier_no, total_revenue) as
 5 |     select
 6 |         l_suppkey,
 7 |         sum(l_extendedprice * (1 - l_discount))
 8 |     from
 9 |         lineitem
10 |     where
11 |         l_shipdate >= date '1996-01-01'
12 |         and l_shipdate < date '1996-01-01' + interval '3' month
13 |     group by
14 |         l_suppkey;
15 | 
16 | query
17 | select
18 |     s_suppkey,
19 |     s_name,
20 |     s_address,
21 |     s_phone,
22 |     total_revenue
23 | from
24 |     supplier,
25 |     revenue0
26 | where
27 |     s_suppkey = supplier_no
28 |     and total_revenue = (
29 |         select
30 |             max(total_revenue)
31 |         from
32 |             revenue0
33 |     )
34 | order by
35 |     s_suppkey;
36 | ----
37 | 10      Supplier#000000010      Saygah3gYWMp72i PY      34-852-489-8585 797313.3838
38 | 
39 | statement ok
40 | drop view revenue0;
41 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q16.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     p_brand,
 6 |     p_type,
 7 |     p_size,
 8 |     count(distinct ps_suppkey) as supplier_cnt
 9 | from
10 |     partsupp,
11 |     part
12 | where
13 |     p_partkey = ps_partkey
14 |     and p_brand <> 'Brand#45'
15 |     and p_type not like 'MEDIUM POLISHED%'
16 |     and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
17 |     and ps_suppkey not in (
18 |         select
19 |             s_suppkey
20 |         from
21 |             supplier
22 |         where
23 |             s_comment like '%Customer%Complaints%'
24 |     )
25 | group by
26 |     p_brand,
27 |     p_type,
28 |     p_size
29 | order by
30 |     supplier_cnt desc,
31 |     p_brand,
32 |     p_type,
33 |     p_size;
34 | ----
35 | Brand#11 PROMO ANODIZED TIN 45 4
36 | Brand#11 SMALL PLATED COPPER 45 4
37 | Brand#11 STANDARD POLISHED TIN 45 4
38 | Brand#13 MEDIUM ANODIZED STEEL 36 4
39 | Brand#13 SMALL BRUSHED NICKEL 19 4
40 | Brand#14 SMALL ANODIZED NICKEL 45 4
41 | Brand#15 LARGE ANODIZED BRASS 45 4
42 | Brand#21 LARGE BURNISHED COPPER 19 4
43 | Brand#23 ECONOMY BRUSHED COPPER 9 4
44 | Brand#24 MEDIUM PLATED STEEL 19 4
45 | Brand#25 MEDIUM PLATED BRASS 45 4
46 | Brand#25 SMALL BURNISHED COPPER 3 4
47 | Brand#31 ECONOMY PLATED STEEL 23 4
48 | Brand#31 PROMO POLISHED TIN 23 4
49 | Brand#32 MEDIUM BURNISHED BRASS 49 4
50 | Brand#33 LARGE BRUSHED TIN 36 4
51 | Brand#33 SMALL BURNISHED NICKEL 3 4
52 | Brand#34 LARGE PLATED BRASS 45 4
53 | Brand#34 MEDIUM BRUSHED COPPER 9 4
54 | Brand#34 SMALL PLATED BRASS 14 4
55 | Brand#35 STANDARD ANODIZED STEEL 23 4
56 | Brand#43 MEDIUM ANODIZED BRASS 14 4
57 | Brand#43 PROMO POLISHED BRASS 19 4
58 | Brand#43 SMALL BRUSHED NICKEL 9 4
59 | Brand#44 SMALL PLATED COPPER 19 4
60 | Brand#51 ECONOMY POLISHED STEEL 49 4
61 | Brand#52 MEDIUM BURNISHED TIN 45 4
62 | Brand#52 SMALL BURNISHED NICKEL 14 4
63 | Brand#53 LARGE BURNISHED NICKEL 23 4
64 | Brand#53 MEDIUM BRUSHED COPPER 3 4
65 | Brand#53 STANDARD PLATED STEEL 45 4
66 | Brand#54 ECONOMY ANODIZED BRASS 9 4
67 | Brand#55 STANDARD ANODIZED BRASS 36 4
68 | Brand#55 STANDARD BRUSHED COPPER 3 4
69 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q17.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     sum(l_extendedprice) / 7.0 as avg_yearly
 6 | from
 7 |     lineitem,
 8 |     part
 9 | where
10 |     p_partkey = l_partkey
11 |     and p_brand = 'Brand#53' -- original: Brand#23
12 |     and p_container = 'MED BOX'
13 |     and l_quantity < (
14 |         select
15 |             0.2 * avg(l_quantity)
16 |         from
17 |             lineitem
18 |         where
19 |             l_partkey = p_partkey
20 |     );
21 | ----
22 | 863.2285714285715
23 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q18.slt.disabled:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     c_name,
 6 |     c_custkey,
 7 |     o_orderkey,
 8 |     o_orderdate,
 9 |     o_totalprice,
10 |     sum(l_quantity)
11 | from
12 |     customer,
13 |     orders,
14 |     lineitem
15 | where
16 |     o_orderkey in (
17 |         select
18 |             l_orderkey
19 |         from
20 |             lineitem
21 |         group by
22 |             l_orderkey having
23 |                 sum(l_quantity) > 250 -- original: 300
24 |     )
25 |     and c_custkey = o_custkey
26 |     and o_orderkey = l_orderkey
27 | group by
28 |     c_name,
29 |     c_custkey,
30 |     o_orderkey,
31 |     o_orderdate,
32 |     o_totalprice
33 | order by
34 |     o_totalprice desc,
35 |     o_orderdate
36 | limit 100;
37 | ----
38 | Customer#000000070      70      2567    1998-02-27      263411.29       266.00
39 | Customer#000000010      10      4421    1997-04-04      258779.02       255.00
40 | Customer#000000082      82      3460    1995-10-03      245976.74       254.00
41 | Customer#000000068      68      2208    1995-05-01      245388.06       256.00
42 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q19.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     sum(l_extendedprice* (1 - l_discount)) as revenue
 6 | from
 7 |     lineitem,
 8 |     part
 9 | where
10 |     (
11 |         p_partkey = l_partkey
12 |         and p_brand = 'Brand#12'
13 |         and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
14 |         and l_quantity >= 1 and l_quantity <= 1 + 10
15 |         and p_size between 1 and 5
16 |         and l_shipmode in ('AIR', 'AIR REG')
17 |         and l_shipinstruct = 'DELIVER IN PERSON'
18 |     )
19 |     or
20 |     (
21 |         p_partkey = l_partkey
22 |         and p_brand = 'Brand#23'
23 |         and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
24 |         and l_quantity >= 10 and l_quantity <= 10 + 10
25 |         and p_size between 1 and 10
26 |         and l_shipmode in ('AIR', 'AIR REG')
27 |         and l_shipinstruct = 'DELIVER IN PERSON'
28 |     )
29 |     or
30 |     (
31 |         p_partkey = l_partkey
32 |         and p_brand = 'Brand#33'
33 |         and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
34 |         and l_quantity >= 20 and l_quantity <= 20 + 10
35 |         and p_size between 1 and 15
36 |         and l_shipmode in ('AIR', 'AIR REG')
37 |         and l_shipinstruct = 'DELIVER IN PERSON'
38 |     );
39 | ----
40 | 24521.1300
41 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q2.slt.disabled:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     s_acctbal,
 6 |     s_name,
 7 |     n_name,
 8 |     p_partkey,
 9 |     p_mfgr,
10 |     s_address,
11 |     s_phone,
12 |     s_comment
13 | from
14 |     part,
15 |     supplier,
16 |     partsupp,
17 |     nation,
18 |     region
19 | where
20 |     p_partkey = ps_partkey
21 |     and s_suppkey = ps_suppkey
22 |     and p_size = 1
23 |     and p_type like '%TIN'
24 |     and s_nationkey = n_nationkey
25 |     and n_regionkey = r_regionkey
26 |     and r_name = 'AFRICA'
27 |     and ps_supplycost = (
28 |         select
29 |             min(ps_supplycost)
30 |         from
31 |             partsupp,
32 |             supplier,
33 |             nation,
34 |             region
35 |         where
36 |             p_partkey = ps_partkey
37 |             and s_suppkey = ps_suppkey
38 |             and s_nationkey = n_nationkey
39 |             and n_regionkey = r_regionkey
40 |             and r_name = 'AFRICA'
41 |     )
42 | order by
43 |     s_acctbal desc,
44 |     n_name,
45 |     s_name,
46 |     p_partkey;
47 | ----
48 | 1365.79 Supplier#000000006      KENYA   154     Manufacturer#1  tQxuVm7s7CnK    24-696-997-4969 final accounts. regular dolphins use against the furiously ironic decoys.
49 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q20.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     s_name,
 6 |     s_address
 7 | from
 8 |     supplier,
 9 |     nation
10 | where
11 |     s_suppkey in (
12 |         select
13 |             ps_suppkey
14 |         from
15 |             partsupp
16 |         where
17 |             ps_partkey in (
18 |                 select
19 |                     p_partkey
20 |                 from
21 |                     part
22 |                 where
23 |                     p_name like 'indian%'
24 |             )
25 |             and ps_availqty > (
26 |                 select
27 |                     0.5 * sum(l_quantity)
28 |                 from
29 |                     lineitem
30 |                 where
31 |                     l_partkey = ps_partkey
32 |                     and l_suppkey = ps_suppkey
33 |                     and l_shipdate >= date '1996-01-01'
34 |                     and l_shipdate < date '1996-01-01' + interval '1' year
35 |             )
36 |     )
37 |     and s_nationkey = n_nationkey
38 |     and n_name = 'IRAQ'
39 | order by
40 |     s_name;
41 | ----
42 | Supplier#000000005      Gcdm2rJRzl5qlTVzc
43 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q21.slt.disabled:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     s_name,
 6 |     count(*) as numwait
 7 | from
 8 |     supplier,
 9 |     lineitem l1,
10 |     orders,
11 |     nation
12 | where
13 |     s_suppkey = l1.l_suppkey
14 |     and o_orderkey = l1.l_orderkey
15 |     and o_orderstatus = 'F'
16 |     and l1.l_receiptdate > l1.l_commitdate
17 |     and exists (
18 |         select
19 |             *
20 |         from
21 |             lineitem l2
22 |         where
23 |             l2.l_orderkey = l1.l_orderkey
24 |             and l2.l_suppkey <> l1.l_suppkey
25 |     )
26 |     and not exists (
27 |         select
28 |             *
29 |         from
30 |             lineitem l3
31 |         where
32 |             l3.l_orderkey = l1.l_orderkey
33 |             and l3.l_suppkey <> l1.l_suppkey
34 |             and l3.l_receiptdate > l3.l_commitdate
35 |     )
36 |     and s_nationkey = n_nationkey
37 |     and n_name = 'SAUDI ARABIA'
38 | group by
39 |     s_name
40 | order by
41 |     numwait desc,
42 |     s_name
43 | limit 100;
44 | ----
45 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q22.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     cntrycode,
 6 |     count(*) as numcust,
 7 |     sum(c_acctbal) as totacctbal
 8 | from
 9 |     (
10 |         select
11 |             substring(c_phone from 1 for 2) as cntrycode,
12 |             c_acctbal
13 |         from
14 |             customer
15 |         where
16 |             substring(c_phone from 1 for 2) in
17 |                 ('13', '31', '23', '29', '30', '18', '17')
18 |             and c_acctbal > (
19 |                 select
20 |                     avg(c_acctbal)
21 |                 from
22 |                     customer
23 |                 where
24 |                     c_acctbal > 0.00
25 |                     and substring(c_phone from 1 for 2) in
26 |                         ('13', '31', '23', '29', '30', '18', '17')
27 |             )
28 |             and not exists (
29 |                 select
30 |                     *
31 |                 from
32 |                     orders
33 |                 where
34 |                     o_custkey = c_custkey
35 |             )
36 |     ) as custsale
37 | group by
38 |     cntrycode
39 | order by
40 |     cntrycode;
41 | ----
42 | 13      1       5679.84
43 | 17      1       9127.27
44 | 18      2       14647.99
45 | 23      1       9255.67
46 | 29      2       17195.08
47 | 30      1       7638.57
48 | 31      1       9331.13
49 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q3.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     l_orderkey,
 6 |     sum(l_extendedprice * (1 - l_discount)) as revenue,
 7 |     o_orderdate,
 8 |     o_shippriority
 9 | from
10 |     customer,
11 |     orders,
12 |     lineitem
13 | where
14 |     c_mktsegment = 'BUILDING'
15 |     and c_custkey = o_custkey
16 |     and l_orderkey = o_orderkey
17 |     and o_orderdate < date '1995-03-15'
18 |     and l_shipdate > date '1995-03-15'
19 | group by
20 |     l_orderkey,
21 |     o_orderdate,
22 |     o_shippriority
23 | order by
24 |     revenue desc,
25 |     o_orderdate
26 | limit 10;
27 | ----
28 | 1637       164224.9253 1995-02-08  0
29 | 5191       49378.3094  1994-12-11  0
30 | 742        43728.0480  1994-12-23  0
31 | 3492       43716.0724  1994-11-24  0
32 | 2883       36666.9612  1995-01-23  0
33 | 998        11785.5486  1994-11-26  0
34 | 3430       4726.6775   1994-12-12  0
35 | 4423       3055.9365   1995-02-17  0
36 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q4.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     o_orderpriority,
 6 |     count(*) as order_count
 7 | from
 8 |     orders
 9 | where
10 |     o_orderdate >= date '1993-07-01'
11 |     and o_orderdate < date '1993-07-01' + interval '3' month
12 |     and exists (
13 |         select
14 |             *
15 |         from
16 |             lineitem
17 |         where
18 |             l_orderkey = o_orderkey
19 |             and l_commitdate < l_receiptdate
20 |     )
21 | group by
22 |     o_orderpriority
23 | order by
24 |     o_orderpriority;
25 | ----
26 | 1-URGENT         9
27 | 2-HIGH           7
28 | 3-MEDIUM         9
29 | 4-NOT SPECIFIED  8
30 | 5-LOW           12
31 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q5.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     n_name,
 6 |     sum(l_extendedprice * (1 - l_discount)) as revenue
 7 | from
 8 |     customer,
 9 |     orders,
10 |     lineitem,
11 |     supplier,
12 |     nation,
13 |     region
14 | where
15 |     c_custkey = o_custkey
16 |     and l_orderkey = o_orderkey
17 |     and l_suppkey = s_suppkey
18 |     and c_nationkey = s_nationkey
19 |     and s_nationkey = n_nationkey
20 |     and n_regionkey = r_regionkey
21 |     and r_name = 'AFRICA'
22 |     and o_orderdate >= date '1994-01-01'
23 |     and o_orderdate < date '1994-01-01' + interval '1' year
24 | group by
25 |     n_name
26 | order by
27 |     revenue desc;
28 | ----
29 | MOROCCO  220457.0142
30 | ETHIOPIA 115183.8546
31 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q6.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     sum(l_extendedprice * l_discount) as revenue
 6 | from
 7 |     lineitem
 8 | where
 9 |     l_shipdate >= date '1994-01-01'
10 |     and l_shipdate < date '1994-01-01' + interval '1' year
11 |     and l_discount between 0.08 - 0.01 and 0.08 + 0.01
12 |     and l_quantity < 24;
13 | ----
14 | 90927.6243
15 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q7.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     supp_nation,
 6 |     cust_nation,
 7 |     l_year,
 8 |     sum(volume) as revenue
 9 | from
10 |     (
11 |         select
12 |             n1.n_name as supp_nation,
13 |             n2.n_name as cust_nation,
14 |             extract(year from l_shipdate) as l_year,
15 |             l_extendedprice * (1 - l_discount) as volume
16 |         from
17 |             supplier,
18 |             lineitem,
19 |             orders,
20 |             customer,
21 |             nation n1,
22 |             nation n2
23 |         where
24 |             s_suppkey = l_suppkey
25 |             and o_orderkey = l_orderkey
26 |             and c_custkey = o_custkey
27 |             and s_nationkey = n1.n_nationkey
28 |             and c_nationkey = n2.n_nationkey
29 |             and (
30 |                 (n1.n_name = 'UNITED STATES' and n2.n_name = 'CHINA')
31 |                 or (n1.n_name = 'CHINA' and n2.n_name = 'UNITED STATES')
32 |             )
33 |             and l_shipdate between date '1995-01-01' and date '1996-12-31'
34 |     ) as shipping
35 | group by
36 |     supp_nation,
37 |     cust_nation,
38 |     l_year
39 | order by
40 |     supp_nation,
41 |     cust_nation,
42 |     l_year;
43 | ----
44 | UNITED STATES CHINA 1995.0 130212.3261
45 | UNITED STATES CHINA 1996.0 195468.6891
46 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q8.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     o_year,
 6 |     sum(case
 7 |         when nation = 'IRAQ' then volume
 8 |         else 0
 9 |     end) / sum(volume) as mkt_share
10 | from
11 |     (
12 |         select
13 |             extract(year from o_orderdate) as o_year,
14 |             l_extendedprice * (1 - l_discount) as volume,
15 |             n2.n_name as nation
16 |         from
17 |             part,
18 |             supplier,
19 |             lineitem,
20 |             orders,
21 |             customer,
22 |             nation n1,
23 |             nation n2,
24 |             region
25 |         where
26 |             p_partkey = l_partkey
27 |             and s_suppkey = l_suppkey
28 |             and l_orderkey = o_orderkey
29 |             and o_custkey = c_custkey
30 |             and c_nationkey = n1.n_nationkey
31 |             and n1.n_regionkey = r_regionkey
32 |             and r_name = 'AMERICA'
33 |             and s_nationkey = n2.n_nationkey
34 |             and o_orderdate between date '1995-01-01' and date '1996-12-31'
35 |             and p_type = 'ECONOMY ANODIZED STEEL'
36 |     ) as all_nations
37 | group by
38 |     o_year
39 | order by
40 |     o_year;
41 | ----
42 | 1995.0 1.00000000
43 | 1996.0 0.32989690
44 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/tpch-q9.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | select
 5 |     nation,
 6 |     o_year,
 7 |     sum(amount) as sum_profit
 8 | from
 9 |     (
10 |         select
11 |             n_name as nation,
12 |             extract(year from o_orderdate) as o_year,
13 |             l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
14 |         from
15 |             part,
16 |             supplier,
17 |             lineitem,
18 |             partsupp,
19 |             orders,
20 |             nation
21 |         where
22 |             s_suppkey = l_suppkey
23 |             and ps_suppkey = l_suppkey
24 |             and ps_partkey = l_partkey
25 |             and p_partkey = l_partkey
26 |             and o_orderkey = l_orderkey
27 |             and s_nationkey = n_nationkey
28 |             and p_name like '%green%'
29 |     ) as profit
30 | group by
31 |     nation,
32 |     o_year
33 | order by
34 |     nation,
35 |     o_year desc;
36 | ----
37 | ARGENTINA 1998.0 17779.0697
38 | ARGENTINA 1997.0 13943.9538
39 | ARGENTINA 1996.0 7641.4227
40 | ARGENTINA 1995.0 20892.7525
41 | ARGENTINA 1994.0 15088.3526
42 | ARGENTINA 1993.0 17586.3446
43 | ARGENTINA 1992.0 28732.4615
44 | ETHIOPIA 1998.0 28217.1600
45 | ETHIOPIA 1996.0 33970.6500
46 | ETHIOPIA 1995.0 37720.3500
47 | ETHIOPIA 1994.0 37251.0100
48 | ETHIOPIA 1993.0 23782.6100
49 | IRAN 1997.0 23590.0080
50 | IRAN 1996.0 7428.2325
51 | IRAN 1995.0 21000.9965
52 | IRAN 1994.0 29408.1300
53 | IRAN 1993.0 49876.4150
54 | IRAN 1992.0 52064.2400
55 | IRAQ 1998.0 11619.9604
56 | IRAQ 1997.0 47910.2460
57 | IRAQ 1996.0 18459.5675
58 | IRAQ 1995.0 32782.3701
59 | IRAQ 1994.0 9041.2317
60 | IRAQ 1993.0 30687.2625
61 | IRAQ 1992.0 29098.2557
62 | KENYA 1998.0 33148.3345
63 | KENYA 1997.0 54355.0165
64 | KENYA 1996.0 53607.4854
65 | KENYA 1995.0 85354.8738
66 | KENYA 1994.0 102904.2511
67 | KENYA 1993.0 109310.8084
68 | KENYA 1992.0 138534.1210
69 | MOROCCO 1998.0 157058.2328
70 | MOROCCO 1997.0 88669.9610
71 | MOROCCO 1996.0 236833.6672
72 | MOROCCO 1995.0 381575.8668
73 | MOROCCO 1994.0 243523.4336
74 | MOROCCO 1993.0 232196.7803
75 | MOROCCO 1992.0 347434.1452
76 | PERU 1998.0 101109.0196
77 | PERU 1997.0 58073.0866
78 | PERU 1996.0 30360.5218
79 | PERU 1995.0 138451.7800
80 | PERU 1994.0 55023.0632
81 | PERU 1993.0 110409.0863
82 | PERU 1992.0 70946.1916
83 | UNITED KINGDOM 1998.0 139685.0440
84 | UNITED KINGDOM 1997.0 183502.0498
85 | UNITED KINGDOM 1996.0 374085.2884
86 | UNITED KINGDOM 1995.0 548356.7984
87 | UNITED KINGDOM 1994.0 266982.7680
88 | UNITED KINGDOM 1993.0 717309.4640
89 | UNITED KINGDOM 1992.0 79540.6016
90 | UNITED STATES 1998.0 32847.9600
91 | UNITED STATES 1997.0 30849.5000
92 | UNITED STATES 1996.0 56125.4600
93 | UNITED STATES 1995.0 15961.7977
94 | UNITED STATES 1994.0 31671.2000
95 | UNITED STATES 1993.0 55057.4690
96 | UNITED STATES 1992.0 51970.2300
97 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-agg-nulls.slt:
--------------------------------------------------------------------------------
 1 | include _basic_tables.slt.part
 2 | 
 3 | # This query has NULL values from the subquery agg. It won't work without the
 4 | # outer join fix.
 5 | # It also has an out-of-order extern column [#1]
 6 | query
 7 | select
 8 |     v1,
 9 |     v2,
10 |     (
11 |         select avg(v4)
12 |         from t2
13 |         where v4 = v2
14 |     ) as avg_v4
15 | from t1 order by v1;
16 | ----
17 | 1 100 NULL
18 | 2 200 200.0
19 | 2 250 250.0
20 | 3 300 300.0
21 | 3 300 300.0
22 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-count-star.slt:
--------------------------------------------------------------------------------
 1 | include _basic_tables.slt.part
 2 | 
 3 | # This query uses a count(*) agg function, with nulls. Nulls should be
 4 | # transformed from NULL to 0 when they come from count(*).
 5 | # It won't work without the outer join fix + a special case on count(*).
 6 | # It also has an out-of-order extern column [#1]
 7 | query
 8 | select
 9 |     v1,
10 |     v2,
11 |     (
12 |         select count(*)
13 |         from t2
14 |         where v4 = v2
15 |     ) as avg_v4
16 | from t1 order by v1;
17 | ----
18 | 1 100 0
19 | 2 200 1
20 | 2 250 1
21 | 3 300 1
22 | 3 300 1
23 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-dup.slt:
--------------------------------------------------------------------------------
 1 | include _basic_tables.slt.part
 2 | 
 3 | query
 4 | select * from t1 where (select sum(v4) from t2 where v3 = v1) > 100;
 5 | ----
 6 | 2 200
 7 | 2 250
 8 | 3 300
 9 | 3 300
10 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-exists-2.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT
 5 |     c.c_custkey,
 6 |     c.c_name
 7 | FROM
 8 |     customer c
 9 | WHERE
10 |     EXISTS (
11 |         SELECT 1
12 |         FROM orders o
13 |         WHERE o.o_custkey = c.c_custkey
14 |         AND o.o_orderstatus = 'O'
15 |         AND o.o_orderdate > '1998-08-01'
16 |     )
17 | AND NOT EXISTS (
18 |     SELECT 1
19 |     FROM orders o
20 |     JOIN lineitem l ON o.o_orderkey = l.l_orderkey
21 |     WHERE o.o_custkey = c.c_custkey
22 |     AND o.o_orderstatus = 'R'
23 |     AND o.o_orderdate > '1998-08-01'
24 |     AND o.o_totalprice > 5000
25 | );
26 | ----
27 | 88 Customer#000000088
28 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-exists-uncor.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT c_name
 5 | FROM customer c
 6 | WHERE c_nationkey IN (
 7 |     SELECT n_nationkey
 8 |     FROM nation
 9 |     WHERE n_name = 'GERMANY'
10 | );
11 | ----
12 | Customer#000000062
13 | Customer#000000071
14 | Customer#000000093
15 | Customer#000000119
16 | Customer#000000129
17 | Customer#000000136
18 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-exists.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT
 5 |     c_custkey,
 6 |     c_name
 7 | FROM
 8 |     customer c
 9 | WHERE
10 |     EXISTS (
11 |         SELECT 1
12 |         FROM orders o
13 |         WHERE o.o_custkey = c.c_custkey
14 |         AND o.o_orderstatus = 'O'
15 |         AND o.o_orderdate > '1998-08-01'
16 |     );
17 | ----
18 | 88 Customer#000000088
19 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-extern-out-of-order.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | # A query with a correlated subquery that retrieves columns out of order
 4 | # i.e. the extern columns are not of the format [#0, #1, ...]
 5 | # This query has extern columns [#1]
 6 | query
 7 | select
 8 |     l_orderkey,
 9 |     l_partkey,
10 |     l_extendedprice,
11 |     (
12 |         select avg(p_size)
13 |         from part
14 |         where p_partkey = l_partkey
15 |     ) as avg_extendedprice
16 | from lineitem
17 | where l_extendedprice > 55000;
18 | ----
19 | 1121 200 55010.00 22.0
20 | 4931 200 55010.00 22.0
21 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-in-exists.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT c.c_custkey, c.c_name
 5 | FROM customer c
 6 | WHERE c.c_custkey IN (
 7 |     SELECT o.o_custkey
 8 |     FROM orders o
 9 |     WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000
10 | )
11 | AND EXISTS (
12 |         SELECT 1
13 |         FROM orders o
14 |         WHERE o.o_custkey = c.c_custkey
15 |         AND o.o_orderstatus = 'O'
16 |     )
17 | order by c.c_custkey;
18 | ----
19 | 10 Customer#000000010
20 | 70 Customer#000000070
21 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-in-uncor.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT c.c_custkey, c.c_name
 5 | FROM customer c
 6 | WHERE c.c_custkey IN (
 7 |     SELECT o.o_custkey
 8 |     FROM orders o
 9 |     WHERE o.o_totalprice > 250000
10 | ) order by c.c_custkey;
11 | ----
12 | 10 Customer#000000010
13 | 70 Customer#000000070
14 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-in.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT c.c_custkey, c.c_name
 5 | FROM customer c
 6 | WHERE c.c_custkey IN (
 7 |     SELECT o.o_custkey
 8 |     FROM orders o
 9 |     WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000
10 | )
11 | ORDER BY c.c_custkey;
12 | ----
13 | 10 Customer#000000010
14 | 70 Customer#000000070
15 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/slt/unnest-not-in-uncor.slt:
--------------------------------------------------------------------------------
 1 | include _tpch_tables.slt.part
 2 | 
 3 | query
 4 | SELECT c.c_custkey, c.c_name
 5 | FROM customer c
 6 | WHERE c.c_custkey NOT IN (
 7 |     SELECT o.o_custkey
 8 |     FROM orders o
 9 |     WHERE  o.o_orderstatus = 'O'
10 | ) order by c.c_custkey;
11 | ----
12 | 3 Customer#000000003
13 | 6 Customer#000000006
14 | 9 Customer#000000009
15 | 12 Customer#000000012
16 | 15 Customer#000000015
17 | 18 Customer#000000018
18 | 21 Customer#000000021
19 | 24 Customer#000000024
20 | 27 Customer#000000027
21 | 30 Customer#000000030
22 | 33 Customer#000000033
23 | 36 Customer#000000036
24 | 39 Customer#000000039
25 | 42 Customer#000000042
26 | 45 Customer#000000045
27 | 48 Customer#000000048
28 | 51 Customer#000000051
29 | 54 Customer#000000054
30 | 57 Customer#000000057
31 | 60 Customer#000000060
32 | 63 Customer#000000063
33 | 66 Customer#000000066
34 | 69 Customer#000000069
35 | 72 Customer#000000072
36 | 75 Customer#000000075
37 | 78 Customer#000000078
38 | 81 Customer#000000081
39 | 84 Customer#000000084
40 | 87 Customer#000000087
41 | 90 Customer#000000090
42 | 93 Customer#000000093
43 | 96 Customer#000000096
44 | 99 Customer#000000099
45 | 102 Customer#000000102
46 | 105 Customer#000000105
47 | 108 Customer#000000108
48 | 111 Customer#000000111
49 | 114 Customer#000000114
50 | 117 Customer#000000117
51 | 120 Customer#000000120
52 | 123 Customer#000000123
53 | 126 Customer#000000126
54 | 129 Customer#000000129
55 | 132 Customer#000000132
56 | 135 Customer#000000135
57 | 138 Customer#000000138
58 | 141 Customer#000000141
59 | 144 Customer#000000144
60 | 147 Customer#000000147
61 | 150 Customer#000000150
62 | 


--------------------------------------------------------------------------------
/optd-sqllogictest/tests/harness.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::path::Path;
 7 | 
 8 | use optd_sqllogictest::DatafusionDBMS;
 9 | use sqllogictest::{harness::Failed, Runner};
10 | use tokio::runtime::Runtime;
11 | 
12 | // TODO: sqllogictest harness should support async new function
13 | 
14 | fn main() {
15 |     let paths = sqllogictest::harness::glob("slt/**/*.slt").expect("failed to find test files");
16 |     let mut tests = vec![];
17 | 
18 |     for entry in paths {
19 |         let path = entry.expect("failed to read glob entry");
20 |         tests.push(sqllogictest::harness::Trial::test(
21 |             path.to_str().unwrap().to_string(),
22 |             move || test(&path),
23 |         ));
24 |     }
25 | 
26 |     if tests.is_empty() {
27 |         panic!("no test found for sqllogictest under: slt/**/*.slt");
28 |     }
29 | 
30 |     sqllogictest::harness::run(&sqllogictest::harness::Arguments::from_args(), tests).exit();
31 | }
32 | 
33 | fn build_runtime() -> Runtime {
34 |     tokio::runtime::Builder::new_current_thread()
35 |         .enable_all()
36 |         .build()
37 |         .unwrap()
38 | }
39 | 
40 | fn test(filename: impl AsRef<Path>) -> Result<(), Failed> {
41 |     build_runtime().block_on(async {
42 |         // let mut tester = Runner::new(|| async { Ok(DatafusionDBMS::new_no_optd().await?) });
43 |         let mut tester = Runner::new(|| async { DatafusionDBMS::new().await });
44 |         tester.run_file_async(filename).await?;
45 |         Ok(())
46 |     })
47 | }
48 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "optd-sqlplannertest"
 3 | description = "sqlplannertest for optd"
 4 | version = { workspace = true }
 5 | edition = { workspace = true }
 6 | homepage = { workspace = true }
 7 | keywords = { workspace = true }
 8 | license = { workspace = true }
 9 | repository = { workspace = true }
10 | 
11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
12 | 
13 | [dependencies]
14 | clap = { version = "4.5.4", features = ["derive"] }
15 | anyhow = { version = "1", features = ["backtrace"] }
16 | sqlplannertest = "0.4.1"
17 | async-trait = "0.1"
18 | datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "43.0.0" }
19 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
20 | datafusion = { version = "43.0.0", features = [
21 |     "avro",
22 |     "crypto_expressions",
23 |     "encoding_expressions",
24 |     "regex_expressions",
25 |     "unicode_expressions",
26 |     "compression",
27 | ] }
28 | mimalloc = { version = "0.1", default-features = false }
29 | regex = "1.8"
30 | tokio = { version = "1.24", features = [
31 |     "macros",
32 |     "rt",
33 |     "rt-multi-thread",
34 |     "sync",
35 |     "parking_lot",
36 | ] }
37 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" }
38 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" }
39 | itertools = "0.13"
40 | lazy_static = "1.4.0"
41 | tracing-subscriber = { version = "0.3", features = ["env-filter"] }
42 | backtrace-on-stack-overflow = "0.3"
43 | 
44 | [dev-dependencies]
45 | criterion = { version = "0.5.1", features = ["async_tokio"] }
46 | serde_yaml = "0.9"
47 | 
48 | [[test]]
49 | name = "planner_test"
50 | harness = false
51 | 
52 | [[bench]]
53 | name = "planner_bench"
54 | harness = false
55 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/src/bench_helper.rs:
--------------------------------------------------------------------------------
 1 | pub mod execution;
 2 | pub mod planning;
 3 | 
 4 | use std::future::Future;
 5 | 
 6 | use crate::TestFlags;
 7 | use anyhow::Result;
 8 | use tokio::runtime::Runtime;
 9 | 
10 | pub use execution::ExecutionBenchRunner;
11 | pub use planning::PlanningBenchRunner;
12 | 
13 | pub trait PlannerBenchRunner {
14 |     /// Describes what the benchmark is evaluating.
15 |     const BENCH_NAME: &str;
16 |     /// Benchmark's input.
17 |     type BenchInput;
18 | 
19 |     /// Setups the necessary environment for the benchmark based on the test case.
20 |     /// Returns the input needed for the benchmark.
21 |     fn setup(
22 |         &mut self,
23 |         test_case: &sqlplannertest::ParsedTestCase,
24 |     ) -> impl std::future::Future<Output = Result<(Self::BenchInput, TestFlags)>> + Send;
25 | 
26 |     /// Runs the actual benchmark based on the test case and input.
27 |     fn bench(
28 |         self,
29 |         input: Self::BenchInput,
30 |         test_case: &sqlplannertest::ParsedTestCase,
31 |         flags: &TestFlags,
32 |     ) -> impl std::future::Future<Output = Result<()>> + Send;
33 | }
34 | 
35 | /// Sync wrapper for [`PlannerBenchRunner::setup`]
36 | pub fn bench_setup<F, Ft, R>(
37 |     runtime: &Runtime,
38 |     runner_fn: F,
39 |     testcase: &sqlplannertest::ParsedTestCase,
40 | ) -> (R, R::BenchInput, TestFlags)
41 | where
42 |     F: Fn() -> Ft + Send + Sync + 'static + Clone,
43 |     Ft: Future<Output = Result<R>> + Send,
44 |     R: PlannerBenchRunner,
45 | {
46 |     runtime.block_on(async {
47 |         let mut runner = runner_fn().await.unwrap();
48 |         let (input, flags) = runner.setup(testcase).await.unwrap();
49 |         (runner, input, flags)
50 |     })
51 | }
52 | 
53 | /// Sync wrapper for [`PlannerBenchRunner::bench`]
54 | pub fn bench_run<R>(
55 |     runtime: &Runtime,
56 |     runner: R,
57 |     input: R::BenchInput,
58 |     testcase: &sqlplannertest::ParsedTestCase,
59 |     flags: &TestFlags,
60 | ) where
61 |     R: PlannerBenchRunner,
62 | {
63 |     runtime.block_on(async { runner.bench(input, testcase, flags).await.unwrap() });
64 | }
65 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/src/bench_helper/execution.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use crate::{extract_flags, DatafusionDBMS, TestFlags};
 4 | use anyhow::Result;
 5 | use datafusion::{execution::TaskContext, physical_plan::ExecutionPlan};
 6 | 
 7 | use super::PlannerBenchRunner;
 8 | 
 9 | /// A benchmark runner for evaluating execution time of optimized plan.
10 | pub struct ExecutionBenchRunner {
11 |     pub dbms: DatafusionDBMS,
12 |     /// DDLs and DMLs to populate the tables.
13 |     pub populate_sql: String,
14 | }
15 | 
16 | impl ExecutionBenchRunner {
17 |     pub async fn new(populate_sql: String) -> Result<Self> {
18 |         Ok(ExecutionBenchRunner {
19 |             dbms: DatafusionDBMS::new().await?,
20 |             populate_sql,
21 |         })
22 |     }
23 | }
24 | 
25 | /// With physical execution plan as input,
26 | /// measures the time it takes to execute the plan generated by the optimizer.
27 | impl PlannerBenchRunner for ExecutionBenchRunner {
28 |     const BENCH_NAME: &str = "execution";
29 |     type BenchInput = Vec<(Arc<dyn ExecutionPlan>, Arc<TaskContext>)>;
30 |     async fn setup(
31 |         &mut self,
32 |         test_case: &sqlplannertest::ParsedTestCase,
33 |     ) -> Result<(Self::BenchInput, TestFlags)> {
34 |         for sql in &test_case.before_sql {
35 |             self.dbms.execute(sql, &TestFlags::default()).await?;
36 |         }
37 | 
38 |         // Populate the existing tables.
39 |         for sql in self.populate_sql.split(";\n") {
40 |             self.dbms.execute(sql, &TestFlags::default()).await?;
41 |         }
42 | 
43 |         let bench_task = test_case
44 |             .tasks
45 |             .iter()
46 |             .find(|x| x.starts_with("bench"))
47 |             .unwrap();
48 |         let flags = extract_flags(bench_task)?;
49 | 
50 |         self.dbms.setup(&flags).await?;
51 |         let statements = self.dbms.parse_sql(&test_case.sql).await?;
52 | 
53 |         let mut physical_plans = Vec::new();
54 |         for statement in statements {
55 |             physical_plans.push(self.dbms.create_physical_plan(statement, &flags).await?);
56 |         }
57 | 
58 |         Ok((physical_plans, flags))
59 |     }
60 |     async fn bench(
61 |         self,
62 |         input: Self::BenchInput,
63 |         _test_case: &sqlplannertest::ParsedTestCase,
64 |         _flags: &TestFlags,
65 |     ) -> Result<()> {
66 |         for (physical_plan, task_ctx) in input {
67 |             self.dbms.execute_physical(physical_plan, task_ctx).await?;
68 |         }
69 |         Ok(())
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/src/bench_helper/planning.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::VecDeque;
 2 | 
 3 | use crate::{extract_flags, DatafusionDBMS, TestFlags};
 4 | use anyhow::Result;
 5 | use datafusion::sql::parser::Statement;
 6 | 
 7 | use super::PlannerBenchRunner;
 8 | 
 9 | /// A benchmark runner for evaluating optimizer planning time.
10 | pub struct PlanningBenchRunner(DatafusionDBMS);
11 | 
12 | impl PlanningBenchRunner {
13 |     pub async fn new() -> Result<Self> {
14 |         Ok(PlanningBenchRunner(DatafusionDBMS::new().await?))
15 |     }
16 | }
17 | 
18 | /// With parsed statements as input,
19 | /// measures the time it takes to generate datafusion physical plans.
20 | impl PlannerBenchRunner for PlanningBenchRunner {
21 |     const BENCH_NAME: &str = "planning";
22 |     type BenchInput = VecDeque<Statement>;
23 |     async fn setup(
24 |         &mut self,
25 |         test_case: &sqlplannertest::ParsedTestCase,
26 |     ) -> Result<(Self::BenchInput, TestFlags)> {
27 |         for sql in &test_case.before_sql {
28 |             self.0.execute(sql, &TestFlags::default()).await?;
29 |         }
30 |         let bench_task = test_case
31 |             .tasks
32 |             .iter()
33 |             .find(|x| x.starts_with("bench"))
34 |             .unwrap();
35 |         let flags = extract_flags(bench_task)?;
36 |         self.0.setup(&flags).await?;
37 |         let statements = self.0.parse_sql(&test_case.sql).await?;
38 | 
39 |         Ok((statements, flags))
40 |     }
41 |     async fn bench(
42 |         self,
43 |         input: Self::BenchInput,
44 |         _test_case: &sqlplannertest::ParsedTestCase,
45 |         flags: &TestFlags,
46 |     ) -> Result<()> {
47 |         for stmt in input {
48 |             self.0.create_physical_plan(stmt, flags).await?;
49 |         }
50 |         Ok(())
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/src/bin/planner_test_apply.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::path::Path;
 7 | 
 8 | use anyhow::Result;
 9 | use clap::Parser;
10 | use sqlplannertest::PlannerTestApplyOptions;
11 | 
12 | #[derive(Parser)]
13 | #[command(version, about, long_about = None)]
14 | struct Cli {
15 |     /// Optional list of test modules or test files to apply the test; if empty, apply all tests
16 |     selections: Vec<String>,
17 |     /// Use the advanced cost model
18 |     #[clap(long)]
19 |     enable_advanced_cost_model: bool,
20 |     /// Execute tests in serial
21 |     #[clap(long)]
22 |     serial: bool,
23 | }
24 | 
25 | #[tokio::main]
26 | async fn main() -> Result<()> {
27 |     use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter};
28 | 
29 |     tracing_subscriber::registry()
30 |         .with(fmt::layer())
31 |         .with(
32 |             EnvFilter::builder()
33 |                 .with_default_directive(LevelFilter::INFO.into())
34 |                 .from_env_lossy(),
35 |         )
36 |         .init();
37 | 
38 |     unsafe { backtrace_on_stack_overflow::enable() };
39 | 
40 |     let cli = Cli::parse();
41 | 
42 |     let enable_advanced_cost_model = cli.enable_advanced_cost_model;
43 |     let opts = PlannerTestApplyOptions {
44 |         serial: cli.serial,
45 |         selections: cli.selections,
46 |     };
47 | 
48 |     sqlplannertest::planner_test_apply_with_options(
49 |         Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"),
50 |         move || async move {
51 |             if enable_advanced_cost_model {
52 |                 optd_sqlplannertest::DatafusionDBMS::new_advanced_cost().await
53 |             } else {
54 |                 optd_sqlplannertest::DatafusionDBMS::new().await
55 |             }
56 |         },
57 |         opts,
58 |     )
59 |     .await?;
60 | 
61 |     Ok(())
62 | }
63 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/basic_nodes.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | create table t2(t2v1 int, t2v3 int);
 4 | insert into t1 values (0, 0), (1, 1), (2, 2);
 5 | insert into t2 values (0, 200), (1, 201), (2, 202);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- Test limit nodes
13 | select * from t1 limit 1;
14 | select * from t1 limit 3;
15 | select * from t1 limit 5;
16 | 
17 | /*
18 | LogicalLimit { skip: 0(i64), fetch: 1(i64) }
19 | └── LogicalProjection { exprs: [ #0, #1 ] }
20 |     └── LogicalScan { table: t1 }
21 | PhysicalLimit { skip: 0(i64), fetch: 1(i64) }
22 | └── PhysicalScan { table: t1 }
23 | 0 0
24 | 0 0
25 | 1 1
26 | 2 2
27 | 0 0
28 | 1 1
29 | 2 2
30 | */
31 | 
32 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/basic_nodes.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 5 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select * from t1 limit 1;
10 |     select * from t1 limit 3;
11 |     select * from t1 limit 5;
12 |   desc: Test limit nodes
13 |   tasks:
14 |     - explain:logical_optd,physical_optd
15 |     - execute


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/constant_predicate.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | insert into t1 values (0, 0), (1, 1), (2, 2);
 4 | 
 5 | /*
 6 | 3
 7 | */
 8 | 
 9 | -- Test whether the optimizer handles integer equality predicates correctly.
10 | select * from t1 where t1v1 = 0;
11 | 
12 | /*
13 | 0 0
14 | */
15 | 
16 | -- Test whether the optimizer handles multiple integer equality predicates correctly.
17 | select * from t1 where t1v1 = 0 and t1v2 = 1;
18 | 
19 | /*
20 | 
21 | */
22 | 
23 | -- Test whether the optimizer handles multiple integer inequality predicates correctly.
24 | select * from t1 where t1v1 = 0 and t1v2 != 1;
25 | 
26 | /*
27 | 0 0
28 | */
29 | 
30 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/constant_predicate.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 4 |   tasks:
 5 |     - execute
 6 | - sql: |
 7 |     select * from t1 where t1v1 = 0;
 8 |   desc: Test whether the optimizer handles integer equality predicates correctly.
 9 |   tasks:
10 |     - execute
11 | - sql: |
12 |     select * from t1 where t1v1 = 0 and t1v2 = 1;
13 |   desc: Test whether the optimizer handles multiple integer equality predicates correctly.
14 |   tasks:
15 |     - execute
16 | - sql: |
17 |     select * from t1 where t1v1 = 0 and t1v2 != 1;
18 |   desc: Test whether the optimizer handles multiple integer inequality predicates correctly.
19 |   tasks:
20 |     - execute
21 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/cross_product.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int);
 3 | create table t2(t2v1 int);
 4 | insert into t1 values (0), (1), (2);
 5 | insert into t2 values (0), (1), (2);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- Test optimizer logical for a cross product.
13 | select * from t1, t2;
14 | 
15 | /*
16 | LogicalProjection { exprs: [ #0, #1 ] }
17 | └── LogicalJoin { join_type: Inner, cond: true }
18 |     ├── LogicalScan { table: t1 }
19 |     └── LogicalScan { table: t2 }
20 | PhysicalNestedLoopJoin { join_type: Inner, cond: true }
21 | ├── PhysicalScan { table: t1 }
22 | └── PhysicalScan { table: t2 }
23 | 0 0
24 | 0 1
25 | 0 2
26 | 1 0
27 | 1 1
28 | 1 2
29 | 2 0
30 | 2 1
31 | 2 2
32 | */
33 | 
34 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/cross_product.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int);
 3 |     create table t2(t2v1 int);
 4 |     insert into t1 values (0), (1), (2);
 5 |     insert into t2 values (0), (1), (2);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select * from t1, t2;
10 |   desc: Test optimizer logical for a cross product.
11 |   tasks:
12 |     - explain:logical_optd,physical_optd
13 |     - execute
14 | 
15 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/eliminate_duplicated_expr.planner.sql:
--------------------------------------------------------------------------------
  1 | -- (no id or description)
  2 | create table t1(v1 int, v2 int);
  3 | insert into t1 values (0, 0), (1, 1), (5, 2), (2, 4), (0, 2);
  4 | 
  5 | /*
  6 | 5
  7 | */
  8 | 
  9 | -- Test without sorts/aggs.
 10 | select * from t1;
 11 | 
 12 | /*
 13 | LogicalProjection { exprs: [ #0, #1 ] }
 14 | └── LogicalScan { table: t1 }
 15 | PhysicalScan { table: t1 }
 16 | 0 0
 17 | 1 1
 18 | 5 2
 19 | 2 4
 20 | 0 2
 21 | */
 22 | 
 23 | -- Test whether the optimizer handles duplicate sort expressions correctly.
 24 | select * from t1 order by v1, v2, v1 desc, v2 desc, v1 asc;
 25 | 
 26 | /*
 27 | LogicalSort
 28 | ├── exprs:
 29 | │   ┌── SortOrder { order: Asc }
 30 | │   │   └── #0
 31 | │   ├── SortOrder { order: Asc }
 32 | │   │   └── #1
 33 | │   ├── SortOrder { order: Desc }
 34 | │   │   └── #0
 35 | │   ├── SortOrder { order: Desc }
 36 | │   │   └── #1
 37 | │   └── SortOrder { order: Asc }
 38 | │       └── #0
 39 | └── LogicalProjection { exprs: [ #0, #1 ] }
 40 |     └── LogicalScan { table: t1 }
 41 | PhysicalSort
 42 | ├── exprs:
 43 | │   ┌── SortOrder { order: Asc }
 44 | │   │   └── #0
 45 | │   └── SortOrder { order: Asc }
 46 | │       └── #1
 47 | └── PhysicalScan { table: t1 }
 48 | 0 0
 49 | 0 2
 50 | 1 1
 51 | 2 4
 52 | 5 2
 53 | */
 54 | 
 55 | -- Test whether the optimizer handles duplicate agg expressions correctly.
 56 | select * from t1 group by v1, v2, v1;
 57 | 
 58 | /*
 59 | LogicalProjection { exprs: [ #0, #1 ] }
 60 | └── LogicalAgg { exprs: [], groups: [ #0, #1, #0 ] }
 61 |     └── LogicalScan { table: t1 }
 62 | PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
 63 | └── PhysicalScan { table: t1 }
 64 | 0 0
 65 | 1 1
 66 | 5 2
 67 | 2 4
 68 | 0 2
 69 | */
 70 | 
 71 | -- Test whether the optimizer handles duplicate sort and agg expressions correctly.
 72 | select * from t1 group by v1, v2, v1, v2, v2 order by v1, v2, v1 desc, v2 desc, v1 asc;
 73 | 
 74 | /*
 75 | LogicalSort
 76 | ├── exprs:
 77 | │   ┌── SortOrder { order: Asc }
 78 | │   │   └── #0
 79 | │   ├── SortOrder { order: Asc }
 80 | │   │   └── #1
 81 | │   ├── SortOrder { order: Desc }
 82 | │   │   └── #0
 83 | │   ├── SortOrder { order: Desc }
 84 | │   │   └── #1
 85 | │   └── SortOrder { order: Asc }
 86 | │       └── #0
 87 | └── LogicalProjection { exprs: [ #0, #1 ] }
 88 |     └── LogicalAgg { exprs: [], groups: [ #0, #1, #0, #1, #1 ] }
 89 |         └── LogicalScan { table: t1 }
 90 | PhysicalSort
 91 | ├── exprs:
 92 | │   ┌── SortOrder { order: Asc }
 93 | │   │   └── #0
 94 | │   └── SortOrder { order: Asc }
 95 | │       └── #1
 96 | └── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] }
 97 |     └── PhysicalScan { table: t1 }
 98 | 0 0
 99 | 0 2
100 | 1 1
101 | 2 4
102 | 5 2
103 | */
104 | 
105 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/eliminate_duplicated_expr.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(v1 int, v2 int);
 3 |     insert into t1 values (0, 0), (1, 1), (5, 2), (2, 4), (0, 2);
 4 |   tasks:
 5 |     - execute
 6 | - sql: |
 7 |     select * from t1;
 8 |   desc: Test without sorts/aggs.
 9 |   tasks:
10 |     - explain:logical_optd,physical_optd
11 |     - execute
12 | - sql: |
13 |     select * from t1 order by v1, v2, v1 desc, v2 desc, v1 asc;
14 |   desc: Test whether the optimizer handles duplicate sort expressions correctly.
15 |   tasks:
16 |     - explain:logical_optd,physical_optd
17 |     - execute
18 | - sql: |
19 |     select * from t1 group by v1, v2, v1;
20 |   desc: Test whether the optimizer handles duplicate agg expressions correctly.
21 |   tasks:
22 |     - explain:logical_optd,physical_optd
23 |     - execute
24 | - sql: |
25 |     select * from t1 group by v1, v2, v1, v2, v2 order by v1, v2, v1 desc, v2 desc, v1 asc;
26 |   desc: Test whether the optimizer handles duplicate sort and agg expressions correctly.
27 |   tasks:
28 |     - explain:logical_optd,physical_optd
29 |     - execute


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/eliminate_limit.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | create table t2(t2v1 int, t2v3 int);
 4 | insert into t1 values (0, 0), (1, 1), (2, 2);
 5 | insert into t2 values (0, 200), (1, 201), (2, 202);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- Test EliminateLimitRule (with 0 limit clause)
13 | select * from t1 LIMIT 0;
14 | 
15 | /*
16 | LogicalLimit { skip: 0(i64), fetch: 0(i64) }
17 | └── LogicalProjection { exprs: [ #0, #1 ] }
18 |     └── LogicalScan { table: t1 }
19 | PhysicalEmptyRelation { produce_one_row: false }
20 | */
21 | 
22 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/eliminate_limit.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | - sql: |
 3 |     create table t1(t1v1 int, t1v2 int);
 4 |     create table t2(t2v1 int, t2v3 int);
 5 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 6 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 7 |   tasks:
 8 |     - execute
 9 | - sql: |
10 |     select * from t1 LIMIT 0;
11 |   desc: Test EliminateLimitRule (with 0 limit clause)
12 |   tasks:
13 |     - explain:logical_optd,physical_optd
14 |     - execute


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/eliminate_proj.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(v1 int, v2 int);
 3 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 4 |     create table t2(v0 int, v1 int, v2 int, v3 int);
 5 |     insert into t2 values (0, 0, 0, 0), (1, 1, 1, 1), (2, 2, 2, 2);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5));
10 |   desc: Test MergeProjectRule with only the rule enabled
11 |   tasks:
12 |     - explain[logical_rules:project_merge_rule]:logical_optd,physical_optd
13 | - sql: |
14 |     select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5));
15 |   desc: Test EliminateProjectRule with only the rule enabled
16 |   tasks:
17 |     - explain[logical_rules:eliminate_project_rule]:logical_optd,physical_optd
18 | - sql: |
19 |     select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5));
20 |   desc: Test with all rules enabled
21 |   tasks:
22 |     - explain:logical_optd,physical_optd
23 |     - execute
24 | - sql: |
25 |     select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5));
26 |   desc: Test with all rules enabled
27 |   tasks:
28 |     - explain:logical_optd,physical_optd
29 |     - execute
30 | - sql: |
31 |     select v0, v2, v1, v3 from (select v0 as v0, v2 as v1, v1 as v2, v3 from t2);
32 |   desc: Test with all rules enabled
33 |   tasks:
34 |     - explain:logical_optd,physical_optd
35 |     - execute
36 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/empty_relation.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | create table t2(t2v1 int, t2v3 int);
 4 | insert into t1 values (0, 0), (1, 1), (2, 2);
 5 | insert into t2 values (0, 200), (1, 201), (2, 202);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- Test whether the optimizer handles empty relation (select single value) correctly.
13 | select 64 + 1;
14 | 
15 | /*
16 | LogicalProjection
17 | ├── exprs:Add
18 | │   ├── 64(i64)
19 | │   └── 1(i64)
20 | └── LogicalEmptyRelation { produce_one_row: true }
21 | PhysicalProjection
22 | ├── exprs:Add
23 | │   ├── 64(i64)
24 | │   └── 1(i64)
25 | └── PhysicalEmptyRelation { produce_one_row: true }
26 | 65
27 | */
28 | 
29 | -- Test whether the optimizer handles select constant from table correctly.
30 | select 64 + 1 from t1;
31 | 
32 | /*
33 | LogicalProjection
34 | ├── exprs:Add
35 | │   ├── 64(i64)
36 | │   └── 1(i64)
37 | └── LogicalScan { table: t1 }
38 | PhysicalProjection
39 | ├── exprs:Add
40 | │   ├── 64(i64)
41 | │   └── 1(i64)
42 | └── PhysicalScan { table: t1 }
43 | 65
44 | 65
45 | 65
46 | */
47 | 
48 | -- Test whether the optimizer eliminates join to empty relation
49 | select * from t1 inner join t2 on false;
50 | 
51 | /*
52 | LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
53 | └── LogicalJoin { join_type: Inner, cond: false }
54 |     ├── LogicalScan { table: t1 }
55 |     └── LogicalScan { table: t2 }
56 | PhysicalEmptyRelation { produce_one_row: false }
57 | */
58 | 
59 | -- Test whether the optimizer eliminates join to empty relation
60 | select 64+1 from t1 inner join t2 on false;
61 | 
62 | /*
63 | LogicalProjection
64 | ├── exprs:Add
65 | │   ├── 64(i64)
66 | │   └── 1(i64)
67 | └── LogicalJoin { join_type: Inner, cond: false }
68 |     ├── LogicalScan { table: t1 }
69 |     └── LogicalScan { table: t2 }
70 | PhysicalProjection
71 | ├── exprs:Add
72 | │   ├── 64(i64)
73 | │   └── 1(i64)
74 | └── PhysicalEmptyRelation { produce_one_row: false }
75 | */
76 | 
77 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/empty_relation.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 5 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select 64 + 1;
10 |   desc: Test whether the optimizer handles empty relation (select single value) correctly.
11 |   tasks:
12 |     - explain:logical_optd,physical_optd
13 |     - execute
14 | - sql: |
15 |     select 64 + 1 from t1;
16 |   desc: Test whether the optimizer handles select constant from table correctly.
17 |   tasks:
18 |     - explain:logical_optd,physical_optd
19 |     - execute
20 | - sql: |
21 |     select * from t1 inner join t2 on false;
22 |   desc: Test whether the optimizer eliminates join to empty relation
23 |   tasks:
24 |     - explain:logical_optd,physical_optd
25 |     - execute
26 | - sql: |
27 |     select 64+1 from t1 inner join t2 on false;
28 |   desc: Test whether the optimizer eliminates join to empty relation
29 |   tasks:
30 |     - explain:logical_optd,physical_optd
31 |     - execute
32 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/verbose.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(v1 int);
 3 | insert into t1 values (0), (1), (2), (3);
 4 | 
 5 | /*
 6 | 4
 7 | */
 8 | 
 9 | -- Test non-verbose explain
10 | select * from t1;
11 | 
12 | /*
13 | PhysicalScan { table: t1 }
14 | */
15 | 
16 | -- Test verbose explain
17 | select * from t1;
18 | 
19 | /*
20 | PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
21 | */
22 | 
23 | -- Test verbose explain with aggregation
24 | select count(*) from t1;
25 | 
26 | /*
27 | PhysicalAgg
28 | ├── aggrs:Agg(Count)
29 | │   └── [ 1(i64) ]
30 | ├── groups: []
31 | ├── cost: {compute=5000,io=1000}
32 | ├── stat: {row_cnt=1000}
33 | └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
34 | */
35 | 
36 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/basic/verbose.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(v1 int);
 3 |     insert into t1 values (0), (1), (2), (3);
 4 |   tasks:
 5 |     - execute
 6 | - sql: |
 7 |     select * from t1;
 8 |   desc: Test non-verbose explain
 9 |   tasks:
10 |     - explain:physical_optd
11 | - sql: |
12 |     select * from t1;
13 |   desc: Test verbose explain
14 |   tasks:
15 |     - explain[verbose]:physical_optd
16 | - sql: |
17 |     select count(*) from t1;
18 |   desc: Test verbose explain with aggregation
19 |   tasks:
20 |     - explain[verbose]:physical_optd
21 | 
22 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/expressions/redundant_exprs.planner.sql:
--------------------------------------------------------------------------------
 1 | -- Setup Test Table
 2 | CREATE TABLE xxx (a INTEGER, b INTEGER);
 3 | INSERT INTO xxx VALUES (0, 0), (1, 1), (2, 2);
 4 | SELECT * FROM xxx WHERE a = 0;
 5 | 
 6 | /*
 7 | 3
 8 | 0 0
 9 | */
10 | 
11 | -- (no id or description)
12 | SELECT * FROM xxx WHERE a + 0 = b + 0;
13 | 
14 | /*
15 | 0 0
16 | 1 1
17 | 2 2
18 | 
19 | LogicalProjection { exprs: [ #0, #1 ] }
20 | └── LogicalFilter
21 |     ├── cond:Eq
22 |     │   ├── Add
23 |     │   │   ├── Cast { cast_to: Int64, child: #0 }
24 |     │   │   └── 0(i64)
25 |     │   └── Add
26 |     │       ├── Cast { cast_to: Int64, child: #1 }
27 |     │       └── 0(i64)
28 |     └── LogicalScan { table: xxx }
29 | PhysicalFilter
30 | ├── cond:Eq
31 | │   ├── Add
32 | │   │   ├── Cast { cast_to: Int64, child: #0 }
33 | │   │   └── 0(i64)
34 | │   └── Add
35 | │       ├── Cast { cast_to: Int64, child: #1 }
36 | │       └── 0(i64)
37 | └── PhysicalScan { table: xxx }
38 | */
39 | 
40 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/expressions/redundant_exprs.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     CREATE TABLE xxx (a INTEGER, b INTEGER);
 3 |     INSERT INTO xxx VALUES (0, 0), (1, 1), (2, 2);
 4 |     SELECT * FROM xxx WHERE a = 0;
 5 |   tasks:
 6 |     - execute
 7 |   desc: Setup Test Table
 8 | - sql: |
 9 |     SELECT * FROM xxx WHERE a + 0 = b + 0;
10 |   tasks:
11 |     - execute
12 |     - explain:logical_optd,physical_optd


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/joins/join_enumerate.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     create table t3(t3v2 int, t3v4 int);
 5 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 6 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 7 |     insert into t3 values (0, 300), (1, 301), (2, 302);
 8 |   tasks:
 9 |     - execute
10 | - sql: |
11 |     select * from t2, t1 where t1v1 = t2v1;
12 |   desc: Test whether the optimizer enumerates all 2-join orders.
13 |   tasks:
14 |     - explain[disable_pruning]:logical_join_orders
15 |     - explain:logical_join_orders
16 |     - execute
17 | - sql: |
18 |     select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
19 |   desc: Test whether the optimizer enumerates all 3-join orders. (It should)
20 |   tasks:
21 |     - explain[disable_pruning]:logical_join_orders
22 |     - explain:logical_join_orders
23 |     - execute
24 | - sql: |
25 |     select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
26 |   desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
27 |   tasks:
28 |     - explain[disable_pruning]:logical_join_orders
29 |     - explain:logical_join_orders
30 |     - execute
31 | - sql: |
32 |     select * from t1, (select * from t2, t3) where t1v1 = t2v1 and t1v2 = t3v2;
33 |   desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
34 |   tasks:
35 |     - explain[disable_pruning]:logical_join_orders,physical_optd
36 |     - explain:logical_join_orders,physical_optd
37 |     - execute
38 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/joins/multi-join.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(a int, b int);
 3 |     create table t2(c int, d int);
 4 |     create table t3(e int, f int);
 5 |     create table t4(g int, h int);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select * from t1, t2, t3 where a = c AND d = e;
10 |   desc: test 3-way join
11 |   tasks:
12 |     - explain:logical_optd,physical_optd
13 | - sql: |
14 |     select * from t1, t2, t3 where a = c AND b = e;
15 |   desc: test 3-way join
16 |   tasks:
17 |     - explain:logical_optd,physical_optd
18 | - sql: |
19 |     select * from t1, t2, t3, t4 where a = c AND b = e AND f = g;
20 |   desc: test 4-way join
21 |   tasks:
22 |     - explain:logical_optd,physical_optd
23 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/joins/self-join.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | create table t2(t2v1 int, t2v3 int);
 4 | insert into t1 values (0, 0), (1, 1), (2, 2);
 5 | insert into t2 values (0, 200), (1, 201), (2, 202);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- test self join
13 | select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
14 | 
15 | /*
16 | (Join t1 t1)
17 | 
18 | LogicalSort
19 | ├── exprs:SortOrder { order: Asc }
20 | │   └── #0
21 | └── LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
22 |     └── LogicalFilter
23 |         ├── cond:Eq
24 |         │   ├── #0
25 |         │   └── #2
26 |         └── LogicalJoin { join_type: Inner, cond: true }
27 |             ├── LogicalScan { table: t1 }
28 |             └── LogicalScan { table: t1 }
29 | PhysicalSort
30 | ├── exprs:SortOrder { order: Asc }
31 | │   └── #0
32 | └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
33 |     ├── PhysicalScan { table: t1 }
34 |     └── PhysicalScan { table: t1 }
35 | 0 0 0 0
36 | 1 1 1 1
37 | 2 2 2 2
38 | */
39 | 
40 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/joins/self-join.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 5 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
10 |   desc: test self join
11 |   tasks:
12 |     - explain:logical_join_orders,logical_optd,physical_optd
13 |     - execute
14 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/planner_test.rs:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2023-2024 CMU Database Group
 2 | //
 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at
 4 | // https://opensource.org/licenses/MIT.
 5 | 
 6 | use std::path::Path;
 7 | 
 8 | use anyhow::Result;
 9 | 
10 | fn main() -> Result<()> {
11 |     sqlplannertest::planner_test_runner(
12 |         Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"),
13 |         || async { optd_sqlplannertest::DatafusionDBMS::new().await },
14 |     )?;
15 |     Ok(())
16 | }
17 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql:
--------------------------------------------------------------------------------
 1 | -- (no id or description)
 2 | create table t1(t1v1 int, t1v2 int);
 3 | create table t2(t2v1 int, t2v3 int);
 4 | insert into t1 values (0, 0), (1, 1), (2, 2);
 5 | insert into t2 values (0, 200), (1, 201), (2, 202);
 6 | 
 7 | /*
 8 | 3
 9 | 3
10 | */
11 | 
12 | -- Test whether we can transpose filter and projection
13 | SELECT t1.t1v1, t1.t1v2, t2.t2v3
14 |   FROM t1, t2
15 |   WHERE t1.t1v1 = t2.t2v1;
16 | 
17 | /*
18 | LogicalProjection { exprs: [ #0, #1, #3 ] }
19 | └── LogicalFilter
20 |     ├── cond:Eq
21 |     │   ├── #0
22 |     │   └── #2
23 |     └── LogicalJoin { join_type: Inner, cond: true }
24 |         ├── LogicalScan { table: t1 }
25 |         └── LogicalScan { table: t2 }
26 | PhysicalProjection { exprs: [ #0, #1, #3 ] }
27 | └── PhysicalFilter
28 |     ├── cond:Eq
29 |     │   ├── #0
30 |     │   └── #2
31 |     └── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
32 |         ├── PhysicalScan { table: t1 }
33 |         └── PhysicalScan { table: t2 }
34 | */
35 | 
36 | -- Test whether we can transpose filter and projection
37 | SELECT t1.t1v1, t1.t1v2, t2.t2v3
38 |   FROM t1, t2
39 |   WHERE t1.t1v1 = t2.t2v3;
40 | 
41 | /*
42 | LogicalProjection { exprs: [ #0, #1, #3 ] }
43 | └── LogicalFilter
44 |     ├── cond:Eq
45 |     │   ├── #0
46 |     │   └── #3
47 |     └── LogicalJoin { join_type: Inner, cond: true }
48 |         ├── LogicalScan { table: t1 }
49 |         └── LogicalScan { table: t2 }
50 | PhysicalProjection { exprs: [ #0, #1, #3 ] }
51 | └── PhysicalFilter
52 |     ├── cond:Eq
53 |     │   ├── #0
54 |     │   └── #3
55 |     └── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
56 |         ├── PhysicalScan { table: t1 }
57 |         └── PhysicalScan { table: t2 }
58 | */
59 | 
60 | -- Test whether we can transpose filter and projection
61 | SELECT * FROM (
62 |   SELECT t1.t1v1, t1.t1v2, t2.t2v3 FROM t1, t2
63 | ) WHERE t1.t1v1 = t2.t2v3;
64 | 
65 | /*
66 | LogicalProjection { exprs: [ #0, #1, #2 ] }
67 | └── LogicalFilter
68 |     ├── cond:Eq
69 |     │   ├── #0
70 |     │   └── #2
71 |     └── LogicalProjection { exprs: [ #0, #1, #3 ] }
72 |         └── LogicalJoin { join_type: Inner, cond: true }
73 |             ├── LogicalScan { table: t1 }
74 |             └── LogicalScan { table: t2 }
75 | PhysicalProjection { exprs: [ #0, #1, #3 ] }
76 | └── PhysicalFilter
77 |     ├── cond:Eq
78 |     │   ├── #0
79 |     │   └── #3
80 |     └── PhysicalNestedLoopJoin { join_type: Inner, cond: true }
81 |         ├── PhysicalScan { table: t1 }
82 |         └── PhysicalScan { table: t2 }
83 | */
84 | 
85 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/pushdowns/fliter_transpose.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 5 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     SELECT t1.t1v1, t1.t1v2, t2.t2v3
10 |       FROM t1, t2
11 |       WHERE t1.t1v1 = t2.t2v1;
12 |   desc: Test whether we can transpose filter and projection
13 |   tasks:
14 |     - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd
15 | - sql: |
16 |     SELECT t1.t1v1, t1.t1v2, t2.t2v3
17 |       FROM t1, t2
18 |       WHERE t1.t1v1 = t2.t2v3;
19 |   desc: Test whether we can transpose filter and projection
20 |   tasks:
21 |     - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd
22 | - sql: |
23 |     SELECT * FROM (
24 |       SELECT t1.t1v1, t1.t1v2, t2.t2v3 FROM t1, t2
25 |     ) WHERE t1.t1v1 = t2.t2v3;
26 |   desc: Test whether we can transpose filter and projection
27 |   tasks:
28 |     - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd
29 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/subqueries/subquery_unnesting.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     create table t3(t3v2 int, t3v4 int);
 5 |   tasks:
 6 |     - execute
 7 | # - sql: |
 8 | #     select * from t1 where t1v1 in (select t2v1 from t2);
 9 | #   desc: Test whether the optimizer can unnest "in" subqueries. -- failing with unsupported expression
10 | #   tasks:
11 | #     - explain_logical
12 | - sql: |
13 |     select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;
14 |   desc: Test whether the optimizer can unnest correlated subqueries with (scalar op agg)
15 |   tasks:
16 |     - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
17 | - sql: |
18 |     select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100;
19 |   desc: Test whether the optimizer can unnest correlated subqueries with (scalar op group agg)
20 |   tasks:
21 |     - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
22 | - sql: |
23 |     select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1;
24 |   desc: Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
25 |   tasks:
26 |     - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
27 | # - sql: |
28 | #     select * from t1 where exists (select * from t2 where t2v1 = t1v1);
29 | #   desc: Test whether the optimizer can unnest correlated subqueries with exists
30 | #   tasks:
31 | #     - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
32 | # todo: a test case on quantifier (any/all)
33 | - sql: |
34 |     select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100;
35 |   desc: Test whether the optimizer can unnest correlated subqueries.
36 |   tasks:
37 |     - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
38 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/bench_populate.sql:
--------------------------------------------------------------------------------
 1 | -- A special version of DDL/DML for popluating the TPC-H tables, sf=0.01
 2 | 
 3 | CREATE EXTERNAL TABLE customer_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/customer.csv';
 4 | insert into customer select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8 from customer_tbl;
 5 | CREATE EXTERNAL TABLE lineitem_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/lineitem.csv';
 6 | insert into lineitem select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9, column_10, column_11, column_12, column_13, column_14, column_15, column_16 from lineitem_tbl;
 7 | CREATE EXTERNAL TABLE nation_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/nation.csv';
 8 | insert into nation select column_1, column_2, column_3, column_4 from nation_tbl;
 9 | CREATE EXTERNAL TABLE orders_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/orders.csv';
10 | insert into orders select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9 from orders_tbl;
11 | CREATE EXTERNAL TABLE part_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/part.csv';
12 | insert into part select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9 from part_tbl;
13 | CREATE EXTERNAL TABLE partsupp_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/partsupp.csv';
14 | insert into partsupp select column_1, column_2, column_3, column_4, column_5 from partsupp_tbl;
15 | CREATE EXTERNAL TABLE region_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/region.csv';
16 | insert into region select column_1, column_2, column_3 from region_tbl;
17 | CREATE EXTERNAL TABLE supplier_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/supplier.csv';
18 | insert into supplier select column_1, column_2, column_3, column_4, column_5, column_6, column_7 from supplier_tbl;
19 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q1.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         l_returnflag,
 4 |         l_linestatus,
 5 |         sum(l_quantity) as sum_qty,
 6 |         sum(l_extendedprice) as sum_base_price,
 7 |         sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
 8 |         sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
 9 |         avg(l_quantity) as avg_qty,
10 |         avg(l_extendedprice) as avg_price,
11 |         avg(l_discount) as avg_disc,
12 |         count(*) as count_order
13 |     FROM
14 |         lineitem
15 |     WHERE
16 |         l_shipdate <= date '1998-12-01' - interval '90' day
17 |     GROUP BY
18 |         l_returnflag, l_linestatus
19 |     ORDER BY
20 |         l_returnflag, l_linestatus;
21 |   desc: TPC-H Q1
22 |   before: ["include_sql:schema.sql"]
23 |   tasks:
24 |     - explain:logical_optd,physical_optd
25 |     - bench
26 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q10.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         c_custkey,
 4 |         c_name,
 5 |         sum(l_extendedprice * (1 - l_discount)) as revenue,
 6 |         c_acctbal,
 7 |         n_name,
 8 |         c_address,
 9 |         c_phone,
10 |         c_comment
11 |     FROM
12 |         customer,
13 |         orders,
14 |         lineitem,
15 |         nation
16 |     WHERE
17 |         c_custkey = o_custkey
18 |         AND l_orderkey = o_orderkey
19 |         AND o_orderdate >= DATE '1993-07-01'
20 |         AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH
21 |         AND l_returnflag = 'R'
22 |         AND c_nationkey = n_nationkey
23 |     GROUP BY
24 |         c_custkey,
25 |         c_name,
26 |         c_acctbal,
27 |         c_phone,
28 |         n_name,
29 |         c_address,
30 |         c_comment
31 |     ORDER BY
32 |         revenue DESC
33 |     LIMIT 20;
34 |   desc: TPC-H Q10
35 |   before: ["include_sql:schema.sql"]
36 |   tasks:
37 |     - explain:logical_optd,physical_optd
38 |     - bench
39 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q11.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         ps_partkey,
 4 |         sum(ps_supplycost * ps_availqty) as value
 5 |     from
 6 |         partsupp,
 7 |         supplier,
 8 |         nation
 9 |     where
10 |         ps_suppkey = s_suppkey
11 |         and s_nationkey = n_nationkey
12 |         and n_name = 'CHINA'
13 |     group by
14 |         ps_partkey having
15 |             sum(ps_supplycost * ps_availqty) > (
16 |                 select
17 |                     sum(ps_supplycost * ps_availqty) * 0.0001000000
18 |                 from
19 |                     partsupp,
20 |                     supplier,
21 |                     nation
22 |                 where
23 |                     ps_suppkey = s_suppkey
24 |                     and s_nationkey = n_nationkey
25 |                     and n_name = 'CHINA'
26 |             )
27 |     order by
28 |         value desc;
29 |   desc: TPC-H Q11
30 |   before: ["include_sql:schema.sql"]
31 |   tasks:
32 |     - explain:logical_optd,physical_optd
33 |     - bench
34 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q12.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         l_shipmode,
 4 |         sum(case when o_orderpriority = '1-URGENT'
 5 |                  or o_orderpriority = '2-HIGH'
 6 |                  then 1 else 0 end) as high_priority_orders,
 7 |         sum(case when o_orderpriority <> '1-URGENT'
 8 |                  and o_orderpriority <> '2-HIGH'
 9 |                  then 1 else 0 end) as low_priority_orders
10 |     FROM
11 |         orders,
12 |         lineitem
13 |     WHERE
14 |         o_orderkey = l_orderkey
15 |         AND l_shipmode in ('MAIL', 'SHIP')
16 |         AND l_commitdate < l_receiptdate
17 |         AND l_shipdate < l_commitdate
18 |         AND l_receiptdate >= DATE '1994-01-01'
19 |         AND l_receiptdate < DATE '1995-01-01'
20 |     GROUP BY
21 |         l_shipmode
22 |     ORDER BY
23 |         l_shipmode;
24 |   desc: TPC-H Q12
25 |   before: ["include_sql:schema.sql"]
26 |   tasks:
27 |     - explain:logical_optd,physical_optd
28 |     - bench
29 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q13.planner.sql:
--------------------------------------------------------------------------------
 1 | -- TPC-H Q13
 2 | select
 3 |     c_count,
 4 |     count(*) as custdist
 5 | from
 6 |     (
 7 |         select
 8 |             c_custkey,
 9 |             count(o_orderkey)
10 |         from
11 |             customer left outer join orders on
12 |                 c_custkey = o_custkey
13 |                 and o_comment not like '%special%requests%'
14 |         group by
15 |             c_custkey
16 |     ) as c_orders (c_custkey, c_count)
17 | group by
18 |     c_count
19 | order by
20 |     custdist desc,
21 |     c_count desc;
22 | 
23 | /*
24 | LogicalSort
25 | ├── exprs:
26 | │   ┌── SortOrder { order: Desc }
27 | │   │   └── #1
28 | │   └── SortOrder { order: Desc }
29 | │       └── #0
30 | └── LogicalProjection { exprs: [ #0, #1 ] }
31 |     └── LogicalAgg
32 |         ├── exprs:Agg(Count)
33 |         │   └── [ 1(i64) ]
34 |         ├── groups: [ #1 ]
35 |         └── LogicalProjection { exprs: [ #0, #1 ] }
36 |             └── LogicalProjection { exprs: [ #0, #1 ] }
37 |                 └── LogicalAgg
38 |                     ├── exprs:Agg(Count)
39 |                     │   └── [ #8 ]
40 |                     ├── groups: [ #0 ]
41 |                     └── LogicalJoin
42 |                         ├── join_type: LeftOuter
43 |                         ├── cond:And
44 |                         │   ├── Eq
45 |                         │   │   ├── #0
46 |                         │   │   └── #9
47 |                         │   └── Like { expr: #16, pattern: "%special%requests%", negated: true, case_insensitive: false }
48 |                         ├── LogicalScan { table: customer }
49 |                         └── LogicalScan { table: orders }
50 | PhysicalSort
51 | ├── exprs:
52 | │   ┌── SortOrder { order: Desc }
53 | │   │   └── #1
54 | │   └── SortOrder { order: Desc }
55 | │       └── #0
56 | └── PhysicalAgg
57 |     ├── aggrs:Agg(Count)
58 |     │   └── [ 1(i64) ]
59 |     ├── groups: [ #1 ]
60 |     └── PhysicalAgg
61 |         ├── aggrs:Agg(Count)
62 |         │   └── [ #8 ]
63 |         ├── groups: [ #0 ]
64 |         └── PhysicalNestedLoopJoin
65 |             ├── join_type: LeftOuter
66 |             ├── cond:And
67 |             │   ├── Eq
68 |             │   │   ├── #0
69 |             │   │   └── #9
70 |             │   └── Like { expr: #16, pattern: "%special%requests%", negated: true, case_insensitive: false }
71 |             ├── PhysicalScan { table: customer }
72 |             └── PhysicalScan { table: orders }
73 | */
74 | 
75 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q13.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         c_count,
 4 |         count(*) as custdist
 5 |     from
 6 |         (
 7 |             select
 8 |                 c_custkey,
 9 |                 count(o_orderkey)
10 |             from
11 |                 customer left outer join orders on
12 |                     c_custkey = o_custkey
13 |                     and o_comment not like '%special%requests%'
14 |             group by
15 |                 c_custkey
16 |         ) as c_orders (c_custkey, c_count)
17 |     group by
18 |         c_count
19 |     order by
20 |         custdist desc,
21 |         c_count desc;
22 |   desc: TPC-H Q13
23 |   before: ["include_sql:schema.sql"]
24 |   tasks:
25 |     - explain:logical_optd,physical_optd
26 |     - bench
27 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q14.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         100.00 * sum(case when p_type like 'PROMO%'
 4 |                         then l_extendedprice * (1 - l_discount)
 5 |                         else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
 6 |     FROM
 7 |         lineitem,
 8 |         part
 9 |     WHERE
10 |         l_partkey = p_partkey
11 |         AND l_shipdate >= DATE '1995-09-01'
12 |         AND l_shipdate < DATE '1995-09-01' + INTERVAL '1' MONTH;
13 |   desc: TPC-H Q14
14 |   before: ["include_sql:schema.sql"]
15 |   tasks:
16 |     - explain:logical_optd,physical_optd
17 |     - bench
18 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q15.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     WITH revenue0 (supplier_no, total_revenue) AS 
 3 |     (
 4 |         SELECT
 5 |             l_suppkey,
 6 |             SUM(l_extendedprice * (1 - l_discount)) 
 7 |         FROM
 8 |             lineitem 
 9 |         WHERE
10 |             l_shipdate >= DATE '1993-01-01' 
11 |             AND l_shipdate < DATE '1993-01-01' + INTERVAL '3' MONTH 
12 |         GROUP BY
13 |             l_suppkey 
14 |     )
15 |     SELECT
16 |         s_suppkey,
17 |         s_name,
18 |         s_address,
19 |         s_phone,
20 |         total_revenue 
21 |     FROM
22 |         supplier,
23 |         revenue0 
24 |     WHERE
25 |         s_suppkey = supplier_no 
26 |         AND total_revenue = 
27 |         (
28 |             SELECT
29 |                 MAX(total_revenue) 
30 |             FROM
31 |                 revenue0 
32 |         )
33 |     ORDER BY
34 |         s_suppkey;
35 |   desc: TPC-H Q15
36 |   before: ["include_sql:schema.sql"]
37 |   tasks:
38 |     - explain:logical_optd,physical_optd
39 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q16.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         p_brand,
 4 |         p_type,
 5 |         p_size,
 6 |         count(distinct ps_suppkey) as supplier_cnt
 7 |     from
 8 |         partsupp,
 9 |         part
10 |     where
11 |         p_partkey = ps_partkey
12 |         and p_brand <> 'Brand#45'
13 |         and p_type not like 'MEDIUM POLISHED%'
14 |         and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
15 |         and ps_suppkey not in (
16 |             select
17 |                 s_suppkey
18 |             from
19 |                 supplier
20 |             where
21 |                 s_comment like '%Customer%Complaints%'
22 |         )
23 |     group by
24 |         p_brand,
25 |         p_type,
26 |         p_size
27 |     order by
28 |         supplier_cnt desc,
29 |         p_brand,
30 |         p_type,
31 |         p_size;
32 |   desc: TPC-H Q16
33 |   before: ["include_sql:schema.sql"]
34 |   tasks:
35 |     - explain:logical_optd,physical_optd
36 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q17.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         ROUND(SUM(l_extendedprice) / 7.0, 16) AS avg_yearly 
 4 |     FROM
 5 |         lineitem,
 6 |         part 
 7 |     WHERE
 8 |         p_partkey = l_partkey 
 9 |         AND p_brand = 'Brand#13' 
10 |         AND p_container = 'JUMBO PKG' 
11 |         AND l_quantity < ( 
12 |             SELECT
13 |                 0.2 * AVG(l_quantity) 
14 |             FROM
15 |                 lineitem 
16 |             WHERE
17 |                 l_partkey = p_partkey 
18 |         );
19 |   desc: TPC-H Q17
20 |   before: ["include_sql:schema.sql"]
21 |   tasks:
22 |     - explain:logical_optd,physical_optd
23 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q18.yml.disabled:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         c_name,
 4 |         c_custkey,
 5 |         o_orderkey,
 6 |         o_orderdate,
 7 |         o_totalprice,
 8 |         sum(l_quantity)
 9 |     from
10 |         customer,
11 |         orders,
12 |         lineitem
13 |     where
14 |         o_orderkey in (
15 |             select
16 |                 l_orderkey
17 |             from
18 |                 lineitem
19 |             group by
20 |                 l_orderkey having
21 |                     sum(l_quantity) > 250 -- original: 300
22 |         )
23 |         and c_custkey = o_custkey
24 |         and o_orderkey = l_orderkey
25 |     group by
26 |         c_name,
27 |         c_custkey,
28 |         o_orderkey,
29 |         o_orderdate,
30 |         o_totalprice
31 |     order by
32 |         o_totalprice desc,
33 |         o_orderdate
34 |     limit 100;
35 |   desc: TPC-H Q18
36 |   before: ["include_sql:schema.sql"]
37 |   tasks:
38 |     - explain:logical_optd,physical_optd
39 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q19.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         sum(l_extendedprice* (1 - l_discount)) as revenue
 4 |     FROM
 5 |         lineitem,
 6 |         part
 7 |     WHERE
 8 |         (
 9 |             p_partkey = l_partkey
10 |             AND p_brand = 'Brand#12'
11 |             AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
12 |             AND l_quantity >= 1 AND l_quantity <= 11
13 |             AND p_size BETWEEN 1 AND 5
14 |             AND l_shipmode IN ('AIR', 'AIR REG')
15 |             AND l_shipinstruct = 'DELIVER IN PERSON'
16 |         ) OR (
17 |             p_partkey = l_partkey
18 |             AND p_brand = 'Brand#23'
19 |             AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
20 |             AND l_quantity >= 10 AND l_quantity <= 20
21 |             AND p_size BETWEEN 1 AND 10
22 |             AND l_shipmode IN ('AIR', 'AIR REG')
23 |             AND l_shipinstruct = 'DELIVER IN PERSON'
24 |         ) OR (
25 |             p_partkey = l_partkey
26 |             AND p_brand = 'Brand#34'
27 |             AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
28 |             AND l_quantity >= 20 AND l_quantity <= 30
29 |             AND p_size BETWEEN 1 AND 15
30 |             AND l_shipmode IN ('AIR', 'AIR REG')
31 |             AND l_shipinstruct = 'DELIVER IN PERSON'
32 |         )
33 |   desc: TPC-H Q19
34 |   before: ["include_sql:schema.sql"]
35 |   tasks:
36 |     - explain:logical_optd,physical_optd
37 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q2.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |             s_acctbal,
 4 |             s_name,
 5 |             n_name,
 6 |             p_partkey,
 7 |             p_mfgr,
 8 |             s_address,
 9 |             s_phone,
10 |             s_comment
11 |     from
12 |             part,
13 |             supplier,
14 |             partsupp,
15 |             nation,
16 |             region
17 |     where
18 |             p_partkey = ps_partkey
19 |             and s_suppkey = ps_suppkey
20 |     and p_size = 4
21 |     and p_type like '%TIN'
22 |             and s_nationkey = n_nationkey
23 |             and n_regionkey = r_regionkey
24 |             and r_name = 'AFRICA'
25 |             and ps_supplycost = (
26 |                     select
27 |                             min(ps_supplycost)
28 |                     from
29 |                             partsupp,
30 |                             supplier,
31 |                             nation,
32 |                             region
33 |                     where
34 |                             p_partkey = ps_partkey
35 |                             and s_suppkey = ps_suppkey
36 |                             and s_nationkey = n_nationkey
37 |                             and n_regionkey = r_regionkey
38 |                             and r_name = 'AFRICA'
39 |             )
40 |     order by
41 |         s_acctbal desc,
42 |         n_name,
43 |         s_name,
44 |         p_partkey
45 |     limit 100;
46 |   desc: TPC-H Q2
47 |   before: ["include_sql:schema.sql"]
48 |   tasks:
49 |     - explain:logical_optd,optimized_logical_optd,physical_optd
50 | 
51 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q20.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         s_name,
 4 |         s_address
 5 |     from
 6 |         supplier,
 7 |         nation
 8 |     where
 9 |         s_suppkey in (
10 |             select
11 |                 ps_suppkey
12 |             from
13 |                 partsupp
14 |             where
15 |                 ps_partkey in (
16 |                     select
17 |                         p_partkey
18 |                     from
19 |                         part
20 |                     where
21 |                         p_name like 'indian%'
22 |                 )
23 |                 and ps_availqty > (
24 |                     select
25 |                         0.5 * sum(l_quantity)
26 |                     from
27 |                         lineitem
28 |                     where
29 |                         l_partkey = ps_partkey
30 |                         and l_suppkey = ps_suppkey
31 |                         and l_shipdate >= date '1996-01-01'
32 |                         and l_shipdate < date '1996-01-01' + interval '1' year
33 |                 )
34 |         )
35 |         and s_nationkey = n_nationkey
36 |         and n_name = 'IRAQ'
37 |     order by
38 |         s_name;
39 |   desc: TPC-H Q20
40 |   before: ["include_sql:schema.sql"]
41 |   tasks:
42 |     - explain:logical_optd,physical_optd
43 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q21.yml.disabled:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 | 	select
 3 | 		s_name,
 4 | 		count(*) as numwait
 5 | 	from
 6 | 		supplier,
 7 | 		lineitem l1,
 8 | 		orders,
 9 | 		nation
10 | 	where
11 | 		s_suppkey = l1.l_suppkey
12 | 		and o_orderkey = l1.l_orderkey
13 | 		and o_orderstatus = 'F'
14 | 		and l1.l_receiptdate > l1.l_commitdate
15 | 		and exists (
16 | 			select
17 | 				*
18 | 			from
19 | 				lineitem l2
20 | 			where
21 | 				l2.l_orderkey = l1.l_orderkey
22 | 				and l2.l_suppkey <> l1.l_suppkey
23 | 		)
24 | 		and not exists (
25 | 			select
26 | 				*
27 | 			from
28 | 				lineitem l3
29 | 			where
30 | 				l3.l_orderkey = l1.l_orderkey
31 | 				and l3.l_suppkey <> l1.l_suppkey
32 | 				and l3.l_receiptdate > l3.l_commitdate
33 | 		)
34 | 		and s_nationkey = n_nationkey
35 | 		and n_name = 'SAUDI ARABIA'
36 | 	group by
37 | 		s_name
38 | 	order by
39 | 		numwait desc,
40 | 		s_name
41 | 	limit 100;
42 |   desc: TPC-H Q21
43 |   before: ["include_sql:schema.sql"]
44 |   tasks:
45 |     - explain:logical_optd,physical_optd
46 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q22.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         cntrycode,
 4 |         count(*) as numcust,
 5 |         sum(c_acctbal) as totacctbal
 6 |     from
 7 |         (
 8 |             select
 9 |                 substring(c_phone from 1 for 2) as cntrycode,
10 |                 c_acctbal
11 |             from
12 |                 customer
13 |             where
14 |                 substring(c_phone from 1 for 2) in
15 |                     ('13', '31', '23', '29', '30', '18', '17')
16 |                 and c_acctbal > (
17 |                     select
18 |                         avg(c_acctbal)
19 |                     from
20 |                         customer
21 |                     where
22 |                         c_acctbal > 0.00
23 |                         and substring(c_phone from 1 for 2) in
24 |                             ('13', '31', '23', '29', '30', '18', '17')
25 |                 )
26 |                 and not exists (
27 |                     select
28 |                         *
29 |                     from
30 |                         orders
31 |                     where
32 |                         o_custkey = c_custkey
33 |                 )
34 |         ) as custsale
35 |     group by
36 |         cntrycode
37 |     order by
38 |         cntrycode;
39 |   desc: TPC-H Q22
40 |   before: ["include_sql:schema.sql"]
41 |   tasks:
42 |     - explain:logical_optd,physical_optd
43 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q3.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         l_orderkey,
 4 |         SUM(l_extendedprice * (1 - l_discount)) AS revenue,
 5 |         o_orderdate,
 6 |         o_shippriority 
 7 |     FROM
 8 |         customer,
 9 |         orders,
10 |         lineitem 
11 |     WHERE
12 |         c_mktsegment = 'FURNITURE' 
13 |         AND c_custkey = o_custkey 
14 |         AND l_orderkey = o_orderkey 
15 |         AND o_orderdate < DATE '1995-03-29' 
16 |         AND l_shipdate > DATE '1995-03-29' 
17 |     GROUP BY
18 |         l_orderkey,
19 |         o_orderdate,
20 |         o_shippriority 
21 |     ORDER BY
22 |         revenue DESC,
23 |         o_orderdate LIMIT 10;
24 |   desc: TPC-H Q3
25 |   before: ["include_sql:schema.sql"]
26 |   tasks:
27 |     - explain:logical_optd,physical_optd
28 |     - bench
29 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q4.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         o_orderpriority,
 4 |         count(*) as order_count
 5 |     from
 6 |         orders
 7 |     where
 8 |         o_orderdate >= date '1993-07-01'
 9 |         and o_orderdate < date '1993-07-01' + interval '3' month
10 |         and exists (
11 |             select
12 |                 *
13 |             from
14 |                 lineitem
15 |             where
16 |                 l_orderkey = o_orderkey
17 |                 and l_commitdate < l_receiptdate
18 |         )
19 |     group by
20 |         o_orderpriority
21 |     order by
22 |         o_orderpriority;
23 |   desc: TPC-H Q4
24 |   before: ["include_sql:schema.sql"]
25 |   tasks:
26 |     - explain:logical_optd,physical_optd
27 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q5.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         n_name AS nation,
 4 |         SUM(l_extendedprice * (1 - l_discount)) AS revenue
 5 |     FROM
 6 |         customer,
 7 |         orders,
 8 |         lineitem,
 9 |         supplier,
10 |         nation,
11 |         region
12 |     WHERE
13 |         c_custkey = o_custkey
14 |         AND l_orderkey = o_orderkey
15 |         AND l_suppkey = s_suppkey
16 |         AND c_nationkey = s_nationkey
17 |         AND s_nationkey = n_nationkey
18 |         AND n_regionkey = r_regionkey
19 |         AND r_name = 'Asia' -- Specified region
20 |         AND o_orderdate >= DATE '2023-01-01'
21 |         AND o_orderdate < DATE '2024-01-01'
22 |     GROUP BY
23 |         n_name
24 |     ORDER BY
25 |         revenue DESC;
26 |   desc: TPC-H Q5
27 |   before: ["include_sql:schema.sql"]
28 |   tasks:
29 |     - explain:logical_optd,physical_optd
30 |     - bench
31 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q6.planner.sql:
--------------------------------------------------------------------------------
 1 | -- TPC-H Q6
 2 | SELECT
 3 |     SUM(l_extendedprice * l_discount) AS revenue_loss
 4 | FROM
 5 |     lineitem
 6 | WHERE
 7 |     l_shipdate >= DATE '2023-01-01'
 8 |     AND l_shipdate < DATE '2024-01-01'
 9 |     AND l_discount BETWEEN 0.05 AND 0.07
10 |     AND l_quantity < 24;
11 | 
12 | /*
13 | LogicalProjection { exprs: [ #0 ] }
14 | └── LogicalAgg
15 |     ├── exprs:Agg(Sum)
16 |     │   └── Mul
17 |     │       ├── #5
18 |     │       └── #6
19 |     ├── groups: []
20 |     └── LogicalFilter
21 |         ├── cond:And
22 |         │   ├── Geq
23 |         │   │   ├── #10
24 |         │   │   └── Cast { cast_to: Date32, child: "2023-01-01" }
25 |         │   ├── Lt
26 |         │   │   ├── #10
27 |         │   │   └── Cast { cast_to: Date32, child: "2024-01-01" }
28 |         │   ├── Between { child: Cast { cast_to: Decimal128(30, 15), child: #6 }, lower: Cast { cast_to: Decimal128(30, 15), child: 0.05(float) }, upper: Cast { cast_to: Decimal128(30, 15), child: 0.07(float) } }
29 |         │   └── Lt
30 |         │       ├── Cast { cast_to: Decimal128(22, 2), child: #4 }
31 |         │       └── Cast { cast_to: Decimal128(22, 2), child: 24(i64) }
32 |         └── LogicalScan { table: lineitem }
33 | PhysicalAgg
34 | ├── aggrs:Agg(Sum)
35 | │   └── Mul
36 | │       ├── #5
37 | │       └── #6
38 | ├── groups: []
39 | └── PhysicalFilter
40 |     ├── cond:And
41 |     │   ├── Geq
42 |     │   │   ├── #10
43 |     │   │   └── Cast { cast_to: Date32, child: "2023-01-01" }
44 |     │   ├── Lt
45 |     │   │   ├── #10
46 |     │   │   └── Cast { cast_to: Date32, child: "2024-01-01" }
47 |     │   ├── Between { child: Cast { cast_to: Decimal128(30, 15), child: #6 }, lower: Cast { cast_to: Decimal128(30, 15), child: 0.05(float) }, upper: Cast { cast_to: Decimal128(30, 15), child: 0.07(float) } }
48 |     │   └── Lt
49 |     │       ├── Cast { cast_to: Decimal128(22, 2), child: #4 }
50 |     │       └── Cast { cast_to: Decimal128(22, 2), child: 24(i64) }
51 |     └── PhysicalScan { table: lineitem }
52 | */
53 | 
54 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q6.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         SUM(l_extendedprice * l_discount) AS revenue_loss
 4 |     FROM
 5 |         lineitem
 6 |     WHERE
 7 |         l_shipdate >= DATE '2023-01-01'
 8 |         AND l_shipdate < DATE '2024-01-01'
 9 |         AND l_discount BETWEEN 0.05 AND 0.07
10 |         AND l_quantity < 24;
11 |   desc: TPC-H Q6
12 |   before: ["include_sql:schema.sql"]
13 |   tasks:
14 |     - explain:logical_optd,physical_optd
15 |     - bench
16 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q7.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         supp_nation,
 4 |         cust_nation,
 5 |         l_year,
 6 |         SUM(volume) AS revenue
 7 |     FROM
 8 |         (
 9 |             SELECT
10 |                 n1.n_name AS supp_nation,
11 |                 n2.n_name AS cust_nation,
12 |                 EXTRACT(YEAR FROM l_shipdate) AS l_year,
13 |                 l_extendedprice * (1 - l_discount) AS volume
14 |             FROM
15 |                 supplier,
16 |                 lineitem,
17 |                 orders,
18 |                 customer,
19 |                 nation n1,
20 |                 nation n2
21 |             WHERE
22 |                 s_suppkey = l_suppkey
23 |                 AND o_orderkey = l_orderkey
24 |                 AND c_custkey = o_custkey
25 |                 AND s_nationkey = n1.n_nationkey
26 |                 AND c_nationkey = n2.n_nationkey
27 |                 AND (
28 |                     (n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY')
29 |                     OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE')
30 |                 )
31 |                 AND l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31'
32 |         ) AS shipping
33 |     GROUP BY
34 |         supp_nation,
35 |         cust_nation,
36 |         l_year
37 |     ORDER BY
38 |         supp_nation,
39 |         cust_nation,
40 |         l_year;
41 |   desc: TPC-H Q7
42 |   before: ["include_sql:schema.sql"]
43 |   tasks:
44 |     - explain:logical_optd,physical_optd
45 |     - bench
46 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q8.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     select
 3 |         o_year,
 4 |         sum(case
 5 |             when nation = 'IRAQ' then volume
 6 |             else 0
 7 |         end) / sum(volume) as mkt_share
 8 |     from
 9 |         (
10 |             select
11 |                 extract(year from o_orderdate) as o_year,
12 |                 l_extendedprice * (1 - l_discount) as volume,
13 |                 n2.n_name as nation
14 |             from
15 |                 part,
16 |                 supplier,
17 |                 lineitem,
18 |                 orders,
19 |                 customer,
20 |                 nation n1,
21 |                 nation n2,
22 |                 region
23 |             where
24 |                 p_partkey = l_partkey
25 |                 and s_suppkey = l_suppkey
26 |                 and l_orderkey = o_orderkey
27 |                 and o_custkey = c_custkey
28 |                 and c_nationkey = n1.n_nationkey
29 |                 and n1.n_regionkey = r_regionkey
30 |                 and r_name = 'AMERICA'
31 |                 and s_nationkey = n2.n_nationkey
32 |                 and o_orderdate between date '1995-01-01' and date '1996-12-31'
33 |                 and p_type = 'ECONOMY ANODIZED STEEL'
34 |         ) as all_nations
35 |     group by
36 |         o_year
37 |     order by
38 |         o_year;
39 |   desc: TPC-H Q8 without top-most limit node
40 |   before: ["include_sql:schema.sql"]
41 |   tasks:
42 |     - explain:logical_optd,physical_optd
43 |     - bench
44 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/tpch/q9.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     SELECT
 3 |         nation,
 4 |         o_year,
 5 |         SUM(amount) AS sum_profit
 6 |     FROM
 7 |         (
 8 |             SELECT
 9 |                 n_name AS nation,
10 |                 EXTRACT(YEAR FROM o_orderdate) AS o_year,
11 |                 l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount
12 |             FROM
13 |                 part,
14 |                 supplier,
15 |                 lineitem,
16 |                 partsupp,
17 |                 orders,
18 |                 nation
19 |             WHERE
20 |                 s_suppkey = l_suppkey
21 |                 AND ps_suppkey = l_suppkey
22 |                 AND ps_partkey = l_partkey
23 |                 AND p_partkey = l_partkey
24 |                 AND o_orderkey = l_orderkey
25 |                 AND s_nationkey = n_nationkey
26 |                 AND p_name LIKE '%green%'
27 |         ) AS profit
28 |     GROUP BY
29 |         nation,
30 |         o_year
31 |     ORDER BY
32 |         nation,
33 |         o_year DESC;
34 |   desc: TPC-H Q9
35 |   before: ["include_sql:schema.sql"]
36 |   tasks:
37 |     - explain:logical_optd,physical_optd
38 |     - bench
39 | 


--------------------------------------------------------------------------------
/optd-sqlplannertest/tests/utils/memo_dump.yml:
--------------------------------------------------------------------------------
 1 | - sql: |
 2 |     create table t1(t1v1 int, t1v2 int);
 3 |     create table t2(t2v1 int, t2v3 int);
 4 |     insert into t1 values (0, 0), (1, 1), (2, 2);
 5 |     insert into t2 values (0, 200), (1, 201), (2, 202);
 6 |   tasks:
 7 |     - execute
 8 | - sql: |
 9 |     select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
10 |   desc: test self join
11 |   tasks:
12 |     - explain[dump_memo_table,enable_tracing]:physical_optd
13 | 


--------------------------------------------------------------------------------
/rust-toolchain:
--------------------------------------------------------------------------------
1 | stable
2 | 


--------------------------------------------------------------------------------
/tpch_diff.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Directories to compare
 4 | dir1="optd_perfbench_workspace/tpch/genned_queries/dbPOSTGRESQL_sf1_sd15721"
 5 | dir2="optd_perfbench_workspace/tpch/genned_queries/dbPOSTGRESQL_sf0.01_sd15721"
 6 | 
 7 | # Loop through the file numbers
 8 | for i in {1..22}; do
 9 |   file1="${dir1}/${i}.sql"
10 |   file2="${dir2}/${i}.sql"
11 | 
12 |   # Check if both files exist
13 |   if [[ -f "$file1" && -f "$file2" ]]; then
14 |     # Use diff to compare files and report differences
15 |     diff_output=$(diff "$file1" "$file2")
16 |     if [ -n "$diff_output" ]; then
17 |       echo "Difference found in file ${i}.sql"
18 |     else
19 |       echo "No differences in file ${i}.sql"
20 |     fi
21 |   else
22 |     echo "File ${i}.sql does not exist in one of the directories."
23 |   fi
24 | done
25 | 


--------------------------------------------------------------------------------