├── .github └── workflows │ ├── CI.yaml │ └── docs.yaml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── ci.sh ├── datafusion-optd-cli ├── Cargo.toml ├── Dockerfile ├── LICENSE.txt ├── README.md ├── examples │ └── cli-session-context.rs ├── src │ ├── catalog.rs │ ├── cli_context.rs │ ├── command.rs │ ├── exec.rs │ ├── functions.rs │ ├── helper.rs │ ├── highlighter.rs │ ├── lib.rs │ ├── main.rs │ ├── object_storage.rs │ ├── pool_type.rs │ ├── print_format.rs │ └── print_options.rs ├── tests │ ├── cli_integration.rs │ └── data │ │ └── sql.txt └── tpch-sf0_01 │ ├── LICENSE │ ├── README.md │ ├── customer.csv │ ├── lineitem.csv │ ├── nation.csv │ ├── orders.csv │ ├── part.csv │ ├── partsupp.csv │ ├── populate.sql │ ├── region.csv │ ├── simple_manual_test.sql │ ├── supplier.csv │ ├── tbl_to_csv.py │ └── test.sql ├── dev_scripts └── which_queries_work.sh ├── docs ├── .gitignore ├── README.md ├── book.toml ├── custom.css └── src │ ├── SUMMARY.md │ ├── cost_model.md │ ├── cost_model_benchmarking.md │ ├── datafusion.md │ ├── datafusion_cli.md │ ├── demo_three_join.md │ ├── demo_tpch_q8.md │ ├── miscellaneous.md │ ├── optd-cascades │ ├── optd-cascades-1.svg │ ├── optd-cascades-2.svg │ ├── optd-cascades-3.svg │ ├── optd-cascades-4.svg │ ├── optd-datafusion-overview.svg │ ├── optd-plan-repr-1.svg │ ├── optd-plan-repr-2.svg │ ├── optd-reopt-architecture.svg │ ├── optd-reopt-plan.svg │ ├── optd-rule-1.svg │ └── optd-rule-2.svg │ ├── optimizer.md │ ├── partial_exploration.md │ ├── plan_repr.md │ ├── properties.md │ ├── reoptimization.md │ ├── rule_engine.md │ └── sqlplannertest.md ├── optd-adaptive-demo ├── Cargo.toml └── src │ └── bin │ ├── optd-adaptive-three-join.rs │ └── optd-adaptive-tpch-q8.rs ├── optd-core ├── Cargo.toml └── src │ ├── cascades.rs │ ├── cascades │ ├── memo.rs │ ├── optimizer.rs │ ├── rule_match.rs │ └── tasks2.rs │ ├── cost.rs │ ├── heuristics.rs │ ├── heuristics │ └── optimizer.rs │ ├── lib.rs │ ├── logical_property.rs │ ├── nodes.rs │ ├── optimizer.rs │ ├── physical_property.rs │ ├── rules.rs │ ├── rules │ └── ir.rs │ ├── tests.rs │ └── tests │ ├── common.rs │ └── heuristics_physical_property.rs ├── optd-datafusion-bridge ├── Cargo.toml └── src │ ├── from_optd.rs │ ├── into_optd.rs │ ├── lib.rs │ └── physical_collector.rs ├── optd-datafusion-repr-adv-cost ├── Cargo.toml └── src │ ├── adv_stats.rs │ ├── adv_stats │ ├── agg.rs │ ├── filter.rs │ ├── filter │ │ ├── in_list.rs │ │ └── like.rs │ ├── join.rs │ ├── limit.rs │ └── stats.rs │ └── lib.rs ├── optd-datafusion-repr ├── Cargo.toml └── src │ ├── cost.rs │ ├── cost │ ├── adaptive_cost.rs │ └── base_cost.rs │ ├── explain.rs │ ├── lib.rs │ ├── memo_ext.rs │ ├── optimizer_ext.rs │ ├── plan_nodes.rs │ ├── plan_nodes │ ├── agg.rs │ ├── empty_relation.rs │ ├── filter.rs │ ├── join.rs │ ├── limit.rs │ ├── macros.rs │ ├── predicates.rs │ ├── predicates │ │ ├── between_pred.rs │ │ ├── bin_op_pred.rs │ │ ├── cast_pred.rs │ │ ├── column_ref_pred.rs │ │ ├── constant_pred.rs │ │ ├── data_type_pred.rs │ │ ├── extern_column_ref_pred.rs │ │ ├── func_pred.rs │ │ ├── in_list_pred.rs │ │ ├── like_pred.rs │ │ ├── list_pred.rs │ │ ├── log_op_pred.rs │ │ ├── sort_order_pred.rs │ │ └── un_op_pred.rs │ ├── projection.rs │ ├── scan.rs │ ├── sort.rs │ └── subquery.rs │ ├── properties.rs │ ├── properties │ ├── column_ref.rs │ └── schema.rs │ ├── rules.rs │ ├── rules │ ├── eliminate_duplicated_expr.rs │ ├── eliminate_limit.rs │ ├── filter.rs │ ├── filter_pushdown.rs │ ├── joins.rs │ ├── macros.rs │ ├── physical.rs │ ├── project_transpose.rs │ ├── project_transpose │ │ ├── project_filter_transpose.rs │ │ ├── project_join_transpose.rs │ │ ├── project_merge.rs │ │ └── project_transpose_common.rs │ ├── subquery.rs │ └── subquery │ │ └── depjoin_pushdown.rs │ ├── testing.rs │ ├── testing │ ├── dummy_cost.rs │ └── tpch_catalog.rs │ └── utils.rs ├── optd-gungnir ├── Cargo.toml └── src │ ├── lib.rs │ ├── stats.rs │ ├── stats │ ├── counter.rs │ ├── hyperloglog.rs │ ├── misragries.rs │ ├── murmur2.rs │ └── tdigest.rs │ ├── utils.rs │ └── utils │ └── arith_encoder.rs ├── optd-perfbench ├── Cargo.toml ├── src │ ├── benchmark.rs │ ├── cardbench.rs │ ├── datafusion_dbms.rs │ ├── job.rs │ ├── lib.rs │ ├── main.rs │ ├── postgres_dbms.rs │ ├── shell.rs │ ├── tpch.rs │ └── truecard.rs └── tests │ └── cardtest_integration.rs ├── optd-sqllogictest ├── Cargo.toml ├── slt │ ├── _basic_tables.slt.part │ ├── _tpch_tables.slt.part │ ├── basic.slt │ ├── tpch-q1.slt │ ├── tpch-q10.slt │ ├── tpch-q11.slt │ ├── tpch-q12.slt │ ├── tpch-q13.slt │ ├── tpch-q14.slt │ ├── tpch-q15.slt │ ├── tpch-q16.slt │ ├── tpch-q17.slt │ ├── tpch-q18.slt.disabled │ ├── tpch-q19.slt │ ├── tpch-q2.slt.disabled │ ├── tpch-q20.slt │ ├── tpch-q21.slt.disabled │ ├── tpch-q22.slt │ ├── tpch-q3.slt │ ├── tpch-q4.slt │ ├── tpch-q5.slt │ ├── tpch-q6.slt │ ├── tpch-q7.slt │ ├── tpch-q8.slt │ ├── tpch-q9.slt │ ├── unnest-agg-nulls.slt │ ├── unnest-count-star.slt │ ├── unnest-dup.slt │ ├── unnest-exists-2.slt │ ├── unnest-exists-uncor.slt │ ├── unnest-exists.slt │ ├── unnest-extern-out-of-order.slt │ ├── unnest-in-exists.slt │ ├── unnest-in-uncor.slt │ ├── unnest-in.slt │ └── unnest-not-in-uncor.slt ├── src │ └── lib.rs └── tests │ └── harness.rs ├── optd-sqlplannertest ├── Cargo.toml ├── README.md ├── benches │ └── planner_bench.rs ├── src │ ├── bench_helper.rs │ ├── bench_helper │ │ ├── execution.rs │ │ └── planning.rs │ ├── bin │ │ └── planner_test_apply.rs │ └── lib.rs └── tests │ ├── basic │ ├── basic_nodes.planner.sql │ ├── basic_nodes.yml │ ├── constant_predicate.planner.sql │ ├── constant_predicate.yml │ ├── cross_product.planner.sql │ ├── cross_product.yml │ ├── eliminate_duplicated_expr.planner.sql │ ├── eliminate_duplicated_expr.yml │ ├── eliminate_limit.planner.sql │ ├── eliminate_limit.yml │ ├── eliminate_proj.planner.sql │ ├── eliminate_proj.yml │ ├── empty_relation.planner.sql │ ├── empty_relation.yml │ ├── filter.planner.sql │ ├── filter.yml │ ├── verbose.planner.sql │ └── verbose.yml │ ├── expressions │ ├── redundant_exprs.planner.sql │ └── redundant_exprs.yml │ ├── joins │ ├── join_enumerate.planner.sql │ ├── join_enumerate.yml │ ├── multi-join.planner.sql │ ├── multi-join.yml │ ├── self-join.planner.sql │ └── self-join.yml │ ├── planner_test.rs │ ├── pushdowns │ ├── fliter_transpose.planner.sql │ └── fliter_transpose.yml │ ├── subqueries │ ├── subquery_unnesting.planner.sql │ └── subquery_unnesting.yml │ ├── tpch │ ├── bench_populate.sql │ ├── q1.planner.sql │ ├── q1.yml │ ├── q10.planner.sql │ ├── q10.yml │ ├── q11.planner.sql │ ├── q11.yml │ ├── q12.planner.sql │ ├── q12.yml │ ├── q13.planner.sql │ ├── q13.yml │ ├── q14.planner.sql │ ├── q14.yml │ ├── q15.planner.sql │ ├── q15.yml │ ├── q16.planner.sql │ ├── q16.yml │ ├── q17.planner.sql │ ├── q17.yml │ ├── q18.yml.disabled │ ├── q19.planner.sql │ ├── q19.yml │ ├── q2.planner.sql │ ├── q2.yml │ ├── q20.planner.sql │ ├── q20.yml │ ├── q21.yml.disabled │ ├── q22.planner.sql │ ├── q22.yml │ ├── q3.planner.sql │ ├── q3.yml │ ├── q4.planner.sql │ ├── q4.yml │ ├── q5.planner.sql │ ├── q5.yml │ ├── q6.planner.sql │ ├── q6.yml │ ├── q7.planner.sql │ ├── q7.yml │ ├── q8.planner.sql │ ├── q8.yml │ ├── q9.planner.sql │ ├── q9.yml │ └── schema.sql │ └── utils │ ├── memo_dump.planner.sql │ └── memo_dump.yml ├── rust-toolchain └── tpch_diff.sh /.github/workflows/CI.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | env: 12 | CARGO_TERM_COLOR: always 13 | 14 | jobs: 15 | check: 16 | runs-on: ubuntu-latest 17 | 18 | services: 19 | postgres: 20 | image: postgres:15 21 | env: 22 | POSTGRES_USER: test_user 23 | POSTGRES_PASSWORD: password 24 | POSTGRES_DB: postgres 25 | ports: 26 | - 5432:5432 27 | options: >- 28 | --health-cmd pg_isready 29 | --health-interval 10s 30 | --health-timeout 5s 31 | --health-retries 5 32 | 33 | steps: 34 | - uses: actions/checkout@v2 35 | - uses: actions-rs/toolchain@v1 36 | with: 37 | profile: minimal 38 | components: rustfmt, clippy 39 | - name: Check code format 40 | uses: actions-rs/cargo@v1 41 | with: 42 | command: fmt 43 | args: --all -- --check 44 | - name: Clippy 45 | uses: actions-rs/cargo@v1 46 | with: 47 | command: clippy 48 | args: --workspace --all-targets --all-features --locked -- -D warnings 49 | - uses: taiki-e/install-action@nextest 50 | - name: Test 51 | uses: actions-rs/cargo@v1 52 | with: 53 | command: nextest 54 | args: run --no-fail-fast --workspace --all-features --locked 55 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: mdbook gh pages 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | workflow_dispatch: 8 | 9 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 10 | permissions: 11 | contents: read 12 | pages: write 13 | id-token: write 14 | 15 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 16 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 17 | concurrency: 18 | group: "pages" 19 | cancel-in-progress: false 20 | 21 | jobs: 22 | build: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: setup mdbook 27 | uses: peaceiris/actions-mdbook@v1 28 | with: 29 | mdbook-version: 'latest' 30 | - run: cd docs && mdbook build 31 | - name: upload dist 32 | uses: actions/upload-pages-artifact@v3 33 | with: 34 | path: docs/book/ 35 | deploy: 36 | runs-on: ubuntu-latest 37 | # Add a dependency to the build job 38 | needs: build 39 | # Deploy to the github-pages environment 40 | environment: 41 | name: github-pages 42 | url: ${{ steps.deployment.outputs.page_url }} 43 | steps: 44 | - name: deploy to gh pages 45 | id: deployment 46 | uses: actions/deploy-pages@v4 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /.vscode 3 | /.DS_Store 4 | /.idea 5 | .history 6 | **/*_workspace/ -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "datafusion-optd-cli", 4 | "optd-core", 5 | "optd-datafusion-bridge", 6 | "optd-datafusion-repr", 7 | "optd-sqlplannertest", 8 | "optd-adaptive-demo", 9 | "optd-gungnir", 10 | "optd-perfbench", 11 | "optd-datafusion-repr-adv-cost", 12 | "optd-sqllogictest", 13 | ] 14 | resolver = "2" 15 | 16 | [workspace.package] 17 | version = "0.1.1" 18 | edition = "2021" 19 | homepage = "https://github.com/cmu-db/optd" 20 | keywords = ["sql", "database", "optimizer", "datafusion"] 21 | license = "MIT" 22 | repository = "https://github.com/cmu-db/optd" 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023-2024 CMU Database Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ci.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # runs the stuff in CI.yaml locally 3 | # unfortunately this needs to be updated manually. just update it if you get annoyed by GHAs failing 4 | 5 | set -e 6 | 7 | cargo fmt --all -- --check 8 | cargo clippy --workspace --all-targets --all-features --locked -- -D warnings 9 | cargo test --no-fail-fast --workspace --all-features --locked 10 | 11 | # %s is a workaround because printing --- doesn"t work in some shells 12 | # this just makes it more obvious when the CI has passed 13 | printf "%s\n| \033[32m\033[1mCI PASSED\033[0m |\n%s\n" "-------------" "-------------" -------------------------------------------------------------------------------- /datafusion-optd-cli/Cargo.toml: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | [package] 19 | name = "datafusion-optd-cli" 20 | description = "Command Line Client for DataFusion query engine." 21 | version = "43.0.0" 22 | authors = ["Apache DataFusion "] 23 | edition = "2021" 24 | keywords = ["arrow", "datafusion", "query", "sql"] 25 | license = "Apache-2.0" 26 | homepage = "https://github.com/cmu-db/optd" 27 | repository = "https://github.com/cmu-db/optd" 28 | # Specify MSRV here as `cargo msrv` doesn't support workspace version 29 | rust-version = "1.79" 30 | readme = "README.md" 31 | 32 | [dependencies] 33 | arrow = { version = "53.0.0" } 34 | async-trait = "0.1.73" 35 | aws-config = "1.5.5" 36 | aws-sdk-sso = "1.43.0" 37 | aws-sdk-ssooidc = "1.44.0" 38 | aws-sdk-sts = "1.43.0" 39 | # end pin aws-sdk crates 40 | aws-credential-types = "1.2.0" 41 | clap = { version = "4.5.16", features = ["derive", "cargo"] } 42 | datafusion = { version = "43.0.0", features = [ 43 | "avro", 44 | "crypto_expressions", 45 | "datetime_expressions", 46 | "encoding_expressions", 47 | "parquet", 48 | "regex_expressions", 49 | "unicode_expressions", 50 | "compression", 51 | ] } 52 | dirs = "5.0.1" 53 | env_logger = "0.11" 54 | futures = "0.3" 55 | mimalloc = { version = "0.1", default-features = false } 56 | object_store = { version = "0.11.0", features = ["aws", "gcp", "http"] } 57 | parking_lot = { version = "0.12" } 58 | parquet = { version = "53.0.0", default-features = false } 59 | regex = "1.8" 60 | rustyline = "14.0" 61 | tokio = { version = "1.24", features = [ 62 | "macros", 63 | "rt", 64 | "rt-multi-thread", 65 | "sync", 66 | "parking_lot", 67 | "signal", 68 | ] } 69 | url = "2.2" 70 | # begin optd-cli patch 71 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } 72 | # end optd-cli patch 73 | 74 | [dev-dependencies] 75 | assert_cmd = "2.0" 76 | ctor = "0.2.0" 77 | predicates = "3.0" 78 | rstest = "0.22" 79 | -------------------------------------------------------------------------------- /datafusion-optd-cli/Dockerfile: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | 18 | FROM rust:1.79-bookworm AS builder 19 | 20 | COPY . /usr/src/datafusion 21 | COPY ./datafusion /usr/src/datafusion/datafusion 22 | COPY ./datafusion-cli /usr/src/datafusion/datafusion-cli 23 | 24 | WORKDIR /usr/src/datafusion/datafusion-cli 25 | 26 | RUN rustup component add rustfmt 27 | 28 | RUN cargo build --release 29 | 30 | FROM debian:bookworm-slim 31 | 32 | COPY --from=builder /usr/src/datafusion/datafusion-cli/target/release/datafusion-cli /usr/local/bin 33 | 34 | RUN mkdir /data 35 | 36 | ENTRYPOINT ["datafusion-cli"] 37 | 38 | CMD ["--data-path", "/data"] 39 | -------------------------------------------------------------------------------- /datafusion-optd-cli/README.md: -------------------------------------------------------------------------------- 1 | 19 | 20 | 21 | 22 | # DataFusion Command-line Interface 23 | 24 | [DataFusion](https://datafusion.apache.org/) is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. 25 | 26 | DataFusion CLI (`datafusion-cli`) is a small command line utility that runs SQL queries using the DataFusion engine. 27 | 28 | # Frequently Asked Questions 29 | 30 | ## Where can I find more information? 31 | 32 | See the [`datafusion-cli` documentation](https://datafusion.apache.org/user-guide/cli/index.html) for further information. 33 | 34 | ## How do I make my IDE work with `datafusion-cli`? 35 | 36 | "open" the `datafusion/datafusion-cli` project as its own top level 37 | project in my IDE (rather than opening `datafusion`) 38 | 39 | The reason `datafusion-cli` is not part of the main workspace in 40 | [`datafusion Cargo.toml`] file is that `datafusion-cli` is a binary and has a 41 | checked in `Cargo.lock` file to ensure reproducible builds. 42 | 43 | However, the `datafusion` and sub crates are intended for use as libraries and 44 | thus do not have a `Cargo.lock` file checked in. 45 | 46 | [`datafusion cargo.toml`]: https://github.com/apache/datafusion/blob/main/Cargo.toml 47 | -------------------------------------------------------------------------------- /datafusion-optd-cli/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | #![doc = include_str!("../README.md")] 19 | pub const DATAFUSION_CLI_VERSION: &str = env!("CARGO_PKG_VERSION"); 20 | 21 | pub mod catalog; 22 | pub mod cli_context; 23 | pub mod command; 24 | pub mod exec; 25 | pub mod functions; 26 | pub mod helper; 27 | pub mod highlighter; 28 | pub mod object_storage; 29 | pub mod pool_type; 30 | pub mod print_format; 31 | pub mod print_options; 32 | -------------------------------------------------------------------------------- /datafusion-optd-cli/src/pool_type.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::{ 19 | fmt::{self, Display, Formatter}, 20 | str::FromStr, 21 | }; 22 | 23 | #[derive(PartialEq, Debug, Clone)] 24 | pub enum PoolType { 25 | Greedy, 26 | Fair, 27 | } 28 | 29 | impl FromStr for PoolType { 30 | type Err = String; 31 | 32 | fn from_str(s: &str) -> Result { 33 | match s { 34 | "Greedy" | "greedy" => Ok(PoolType::Greedy), 35 | "Fair" | "fair" => Ok(PoolType::Fair), 36 | _ => Err(format!("Invalid memory pool type '{}'", s)), 37 | } 38 | } 39 | } 40 | 41 | impl Display for PoolType { 42 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 43 | match self { 44 | PoolType::Greedy => write!(f, "greedy"), 45 | PoolType::Fair => write!(f, "fair"), 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /datafusion-optd-cli/tests/cli_integration.rs: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | use std::process::{Command, Stdio}; 19 | 20 | use assert_cmd::prelude::CommandCargoExt; 21 | 22 | #[cfg(test)] 23 | #[ctor::ctor] 24 | fn init() { 25 | // Enable RUST_LOG logging configuration for tests 26 | let _ = env_logger::try_init(); 27 | } 28 | 29 | // TODO: fix these later. They're commented out since they were broken when we first received the codebase. 30 | // #[rstest] 31 | // #[case::exec_from_commands( 32 | // ["--command", "select 1", "--format", "json", "-q"], 33 | // "[{\"Int64(1)\":1}]\n" 34 | // )] 35 | // #[case::exec_multiple_statements( 36 | // ["--command", "select 1; select 2;", "--format", "json", "-q"], 37 | // "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" 38 | // )] 39 | // #[case::exec_from_files( 40 | // ["--file", "tests/data/sql.txt", "--format", "json", "-q"], 41 | // "[{\"Int64(1)\":1}]\n" 42 | // )] 43 | // #[case::set_batch_size( 44 | // ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], 45 | // "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" 46 | // )] 47 | // #[test] 48 | // fn cli_quick_test<'a>(#[case] args: impl IntoIterator, #[case] expected: &str) { 49 | // let mut cmd = Command::cargo_bin("datafusion-optd-cli").unwrap(); 50 | // cmd.args(args); 51 | // cmd.assert().stdout(predicate::eq(expected)); 52 | // } 53 | 54 | #[test] 55 | fn cli_test_tpch() { 56 | let mut cmd = Command::cargo_bin("datafusion-optd-cli").unwrap(); 57 | cmd.current_dir(".."); // all paths in `test.sql` assume we're in the base dir of the repo 58 | cmd.args(["--file", "datafusion-optd-cli/tpch-sf0_01/test.sql"]); 59 | cmd.stdout(Stdio::piped()).stderr(Stdio::piped()); 60 | let status = cmd.status().unwrap(); 61 | assert!( 62 | status.success(), 63 | "should not have crashed when running tpch" 64 | ); 65 | } 66 | -------------------------------------------------------------------------------- /datafusion-optd-cli/tests/data/sql.txt: -------------------------------------------------------------------------------- 1 | select 1; -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/README.md: -------------------------------------------------------------------------------- 1 | ## Generate Data 2 | 3 | ``` 4 | git clone https://github.com/electrum/tpch-dbgen.git 5 | cd tpch-dbgen 6 | make 7 | ./dbgen -s 0.001 8 | mv *.tbl 9 | ``` 10 | 11 | This directory is copy-pasted from the RisingLight project. 12 | -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/nation.csv: -------------------------------------------------------------------------------- 1 | 0|ALGERIA|0| haggle. carefully final deposits detect slyly agai 2 | 1|ARGENTINA|1|al foxes promise slyly according to the regular accounts. bold requests alon 3 | 2|BRAZIL|1|y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special 4 | 3|CANADA|1|eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold 5 | 4|EGYPT|4|y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d 6 | 5|ETHIOPIA|0|ven packages wake quickly. regu 7 | 6|FRANCE|3|refully final requests. regular, ironi 8 | 7|GERMANY|3|l platelets. regular accounts x-ray: unusual, regular acco 9 | 8|INDIA|2|ss excuses cajole slyly across the packages. deposits print aroun 10 | 9|INDONESIA|2| slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull 11 | 10|IRAN|4|efully alongside of the slyly final dependencies. 12 | 11|IRAQ|4|nic deposits boost atop the quickly final requests? quickly regula 13 | 12|JAPAN|2|ously. final, express gifts cajole a 14 | 13|JORDAN|4|ic deposits are blithely about the carefully regular pa 15 | 14|KENYA|0| pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t 16 | 15|MOROCCO|0|rns. blithely bold courts among the closely regular packages use furiously bold platelets? 17 | 16|MOZAMBIQUE|0|s. ironic, unusual asymptotes wake blithely r 18 | 17|PERU|1|platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun 19 | 18|CHINA|2|c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos 20 | 19|ROMANIA|3|ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account 21 | 20|SAUDI ARABIA|4|ts. silent requests haggle. closely express packages sleep across the blithely 22 | 21|VIETNAM|2|hely enticingly express accounts. even, final 23 | 22|RUSSIA|3| requests against the platelets use never according to the quickly regular pint 24 | 23|UNITED KINGDOM|3|eans boost carefully special requests. accounts are. carefull 25 | 24|UNITED STATES|1|y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/region.csv: -------------------------------------------------------------------------------- 1 | 0|AFRICA|lar deposits. blithely final packages cajole. regular waters are final requests. regular accounts are according to 2 | 1|AMERICA|hs use ironic, even requests. s 3 | 2|ASIA|ges. thinly even pinto beans ca 4 | 3|EUROPE|ly final courts cajole furiously final excuse 5 | 4|MIDDLE EAST|uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql: -------------------------------------------------------------------------------- 1 | -- This is just used if you want to run really simple manual tests on the CLI. Feel free to delete the whole thing and write your own manual tests 2 | -- Command: `cargo run --bin datafusion-optd-cli -- --enable-df-logical -f datafusion-optd-cli/tpch-sf0_01/simple_manual_test.sql` 3 | CREATE TABLE NATION ( 4 | N_NATIONKEY INT NOT NULL, 5 | N_NAME CHAR(25) NOT NULL, 6 | N_REGIONKEY INT NOT NULL, 7 | N_COMMENT VARCHAR(152) 8 | ); 9 | 10 | CREATE EXTERNAL TABLE nation_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION 'datafusion-optd-cli/tpch-sf0_01/nation.csv'; 11 | insert into nation select column_1, column_2, column_3, column_4 from nation_tbl; 12 | 13 | SELECT * FROM nation where nation.n_nationkey = 1 OR nation.n_nationkey = 2 OR nation.n_nationkey = 5; 14 | -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/supplier.csv: -------------------------------------------------------------------------------- 1 | 1|Supplier#000000001| N kD4on9OM Ipw3,gf0JBoQDd7tgrzrddZ|17|27-918-335-1736|5755.94|each slyly above the careful 2 | 2|Supplier#000000002|89eJ5ksX3ImxJQBvxObC,|5|15-679-861-2259|4032.68| slyly bold instructions. idle dependen 3 | 3|Supplier#000000003|q1,G3Pj6OjIuUYfUoH18BFTKP5aU9bEV3|1|11-383-516-1199|4192.40|blithely silent requests after the express dependencies are sl 4 | 4|Supplier#000000004|Bk7ah4CK8SYQTepEmvMkkgMwg|15|25-843-787-7479|4641.08|riously even requests above the exp 5 | 5|Supplier#000000005|Gcdm2rJRzl5qlTVzc|11|21-151-690-3663|-283.84|. slyly regular pinto bea 6 | 6|Supplier#000000006|tQxuVm7s7CnK|14|24-696-997-4969|1365.79|final accounts. regular dolphins use against the furiously ironic decoys. 7 | 7|Supplier#000000007|s,4TicNGB4uO6PaSqNBUq|23|33-990-965-2201|6820.35|s unwind silently furiously regular courts. final requests are deposits. requests wake quietly blit 8 | 8|Supplier#000000008|9Sq4bBH2FQEmaFOocY45sRTxo6yuoG|17|27-498-742-3860|7627.85|al pinto beans. asymptotes haggl 9 | 9|Supplier#000000009|1KhUgZegwM3ua7dsYmekYBsK|10|20-403-398-8662|5302.37|s. unusual, even requests along the furiously regular pac 10 | 10|Supplier#000000010|Saygah3gYWMp72i PY|24|34-852-489-8585|3891.91|ing waters. regular requests ar -------------------------------------------------------------------------------- /datafusion-optd-cli/tpch-sf0_01/tbl_to_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | def tbl_to_csv(file): 5 | lines = [] 6 | for line in Path(file).read_text().splitlines(): 7 | # Replace the delimiter `|` with `,` 8 | line = line.strip('|') 9 | lines.append(line) 10 | # Write the converted content to a new `.csv` file 11 | Path(file.replace('.tbl', '.csv')).write_text('\n'.join(lines)) 12 | 13 | def main(): 14 | # Find all files end with `.tbl` in the current directory 15 | # and convert them to `.csv` files. 16 | for file in os.listdir('.'): 17 | if file.endswith('.tbl'): 18 | tbl_to_csv(file) 19 | 20 | 21 | if __name__ == '__main__': 22 | main() 23 | -------------------------------------------------------------------------------- /dev_scripts/which_queries_work.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | benchmark_name=$1 3 | USAGE="Usage: $0 [job|joblight|tpch]" 4 | 5 | if [ $# -ne 1 ]; then 6 | echo >&2 $USAGE 7 | exit 1 8 | fi 9 | 10 | if [[ "$benchmark_name" == "job" ]]; then 11 | all_ids="1a,1b,1c,1d,2a,2b,2c,2d,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,6f,7a,7b,7c,8a,8b,8c,8d,9a,9b,9c,9d,10a,10b,10c,11a,11b,11c,11d,12a,12b,12c,13a,13b,13c,13d,14a,14b,14c,15a,15b,15c,15d,16a,16b,16c,16d,17a,17b,17c,17d,17e,17f,18a,18b,18c,19a,19b,19c,19d,20a,20b,20c,21a,21b,21c,22a,22b,22c,22d,23a,23b,23c,24a,24b,25a,25b,25c,26a,26b,26c,27a,27b,27c,28a,28b,28c,29a,29b,29c,30a,30b,30c,31a,31b,31c,32a,32b,33a,33b,33c" 12 | vec_var_name="WORKING_JOB_QUERY_IDS" 13 | elif [[ "$benchmark_name" == "joblight" ]]; then 14 | all_ids="1a,1b,1c,1d,2a,3a,3b,3c,4a,4b,4c,5a,5b,5c,6a,6b,6c,6d,6e,7a,7b,7c,8a,8b,8c,9a,9b,10a,10b,10c,11a,11b,11c,12a,12b,12c,13a,14a,14b,14c,15a,15b,15c,16a,17a,17b,17c,18a,18b,18c,19a,19b,20a,20b,20c,21a,21b,22a,22b,22c,23a,23b,24a,24b,25a,26a,26b,27a,27b,28a" 15 | vec_var_name="WORKING_JOBLIGHT_QUERY_IDS" 16 | elif [[ "$benchmark_name" == "tpch" ]]; then 17 | all_ids="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22" 18 | vec_var_name="WORKING_QUERY_IDS" 19 | else 20 | echo >&2 $USAGE 21 | exit 1 22 | fi 23 | 24 | successful_ids=() 25 | IFS=',' 26 | for id in $all_ids; do 27 | cargo run --release --bin optd-perfbench cardbench $benchmark_name --query-ids $id &>/dev/null 28 | 29 | if [ $? -eq 0 ]; then 30 | echo >&2 $id succeeded 31 | successful_ids+=("$id") 32 | else 33 | echo >&2 $id failed 34 | fi 35 | done 36 | 37 | echo >&2 38 | echo " Useful Outputs" 39 | echo "================" 40 | working_query_ids_vec="pub const ${vec_var_name}: &[&str] = &[\"${successful_ids[0]}\"" 41 | IFS=" " 42 | for id in "${successful_ids[@]:1}"; do 43 | working_query_ids_vec+=", \"$id\"" 44 | done 45 | working_query_ids_vec+="]" 46 | echo "${working_query_ids_vec}" 47 | IFS="," 48 | echo "--query-ids ${successful_ids[*]}" 49 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | book 2 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # optd Development Documentation 2 | 3 | The docs is written in `mdbook` format. You can follow the [`mdbook` installation guide](https://rust-lang.github.io/mdBook/guide/installation.html) to set up the environment. After installing mdbook, you can use the following command to start the docs server: 4 | 5 | ```shell 6 | mdbook serve 7 | ``` 8 | 9 | The online version of the documentation can be found at [https://cmu-db.github.io/optd/](https://cmu-db.github.io/optd/). 10 | -------------------------------------------------------------------------------- /docs/book.toml: -------------------------------------------------------------------------------- 1 | [book] 2 | authors = ["Alex Chi"] 3 | language = "en" 4 | multilingual = false 5 | src = "src" 6 | title = "the optd book" 7 | 8 | [output.html] 9 | additional-css = ["custom.css"] 10 | -------------------------------------------------------------------------------- /docs/custom.css: -------------------------------------------------------------------------------- 1 | .content img { 2 | margin-left: auto; 3 | margin-right: auto; 4 | display: block; 5 | } 6 | -------------------------------------------------------------------------------- /docs/src/SUMMARY.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | # optd book 4 | 5 | [Intro to optd]() 6 | - [The Core]() 7 | - [Plan Representation]() 8 | - [Memo Table and Logical Equivalence]() 9 | - [Cascades Framework]() 10 | - [Basic Cascades Tasks]() 11 | - [Cycle Avoidance]() 12 | - [Upper Bound Pruning]() 13 | - [Multi-Stage Optimization]() 14 | - [Rule IR and Matcher]() 15 | - [Cost and Statistics]() 16 | - [Logical Properties]() 17 | - [Physical Properties and Enforcers]() 18 | - [Memo Table: Subgoals and Winners]() 19 | - [Cascades Tasks: Required Physical Properties]() 20 | - [Exploration Budget]() 21 | - [Heuristics Optimizer]() 22 | - [Integration with Datafusion]() 23 | - [Datafusion Plan Representation]() 24 | - [Datafusion Bridge]() 25 | - [Rule Engine and Rules]() 26 | - [Basic Cost Model]() 27 | - [Logical and Physical Properties]() 28 | - [Optimization Passes]() 29 | - [Miscellaneous]() 30 | - [Explain]() 31 | - [Research]() 32 | - [Partial Exploration and Re-Optimization]() 33 | - [Advanced Cost Model]() 34 | - [The Hyper Subquery Unnesting Ruleset]() 35 | - [Testing and Benchmark]() 36 | - [sqlplannertest]() 37 | - [sqllogictest]() 38 | - [perfbench]() 39 | - [Debugging and Tracing]() 40 | - [optd-core Tracing]() 41 | - [Memo Table Visualization]() 42 | - [Optimizer Dump]() 43 | - [Contribution Guide]() 44 | - [Install Tools]() 45 | - [Contribution Workflow]() 46 | - [Add a Datafusion Rule]() 47 | - [What's Next]() 48 | - [Ideas]() 49 | - [RFCs]() 50 | --- 51 | 52 | # DEPRECATED 53 | - [old optd book]() 54 | - [Core Framework]() 55 | - [Optimizer](./optimizer.md) 56 | - [Plan Representation](./plan_repr.md) 57 | - [Rule Engine](./rule_engine.md) 58 | - [Cost Model](./cost_model.md) 59 | - [Properties](./properties.md) 60 | - [Integration]() 61 | - [Apache Arrow Datafusion](./datafusion.md) 62 | - [Adaptive Optimization]() 63 | - [Re-optimization](./reoptimization.md) 64 | - [Partial Exploration](./partial_exploration.md) 65 | - [Demo]() 66 | - [Three Join Demo](./demo_three_join.md) 67 | - [TPC-H Q8 Demo](./demo_tpch_q8.md) 68 | - [Performance Benchmarking]() 69 | - [Cost Model Cardinality Benchmarking](./cost_model_benchmarking.md) 70 | - [Functional Testing]() 71 | - [SQLPlannerTest](./sqlplannertest.md) 72 | - [Datafusion CLI](./datafusion_cli.md) 73 | - [Miscellaneous](./miscellaneous.md) 74 | -------------------------------------------------------------------------------- /docs/src/cost_model.md: -------------------------------------------------------------------------------- 1 | # Cost Model 2 | 3 | Developers can plug their own cost models into optd. The cost must be represented as a vector of `f64`s, where the first element in the vector is the weighted cost. The optimizer will use weighted cost internally for cost comparison and select the winner for a group. 4 | 5 | The cost model interface can be found in `optd-core/src/cost.rs`, and the core of the cost model is the cost computation process implemented in `CostModel::compute_cost`. 6 | 7 | ```rust 8 | pub trait CostModel: 'static + Send + Sync { 9 | fn compute_cost( 10 | &self, 11 | node: &T, 12 | data: &Option, 13 | children: &[Cost], 14 | context: RelNodeContext, 15 | ) -> Cost; 16 | } 17 | ``` 18 | 19 | `compute_cost` takes the cost of the children, the current plan node information, and some contexts of the current node. The context will be useful for adaptive optimization, and it contains the group ID and the expression ID of the current plan node, so that the adaptive cost model can use runtime information from the last run to compute the cost. 20 | 21 | The optd Datafusion cost model stores 4 elements in the cost vector: weighted cost, row count, compute cost and I/O cost. The cost of the plan nodes and the SQL expressions can all be computed solely based on these information. 22 | 23 | Contrary to other optimizer frameworks like Calcite, optd does not choose to implement the cost model as part of the plan node member functions. In optd, developers write all cost computation things in one file, so that testing and debugging the cost model all happens in one file (or in one `impl`). 24 | -------------------------------------------------------------------------------- /docs/src/datafusion_cli.md: -------------------------------------------------------------------------------- 1 | # Datafusion CLI 2 | 3 | Developers can interact with optd by using the Datafusion cli. The cli supports creating tables, populating data, and executing ANSI SQL queries. 4 | 5 | ```shell 6 | cargo run --bin datafusion-optd-cli 7 | ``` 8 | 9 | We also have a scale 0.01 TPC-H dataset to test. The test SQL can be executed with the Datafusion cli. 10 | 11 | ```shell 12 | cargo run --bin datafusion-optd-cli -- -f datafusion-optd-cli/tpch-sf0_01/test.sql 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/src/demo_three_join.md: -------------------------------------------------------------------------------- 1 | # Three Join Demo 2 | 3 | You can run this demo with the following command: 4 | 5 | ```shell 6 | cargo run --release --bin optd-adaptive-three-join 7 | ``` 8 | 9 | We create 3 tables and join them. The underlying data are getting updated every time the query is executed. 10 | 11 | ```sql 12 | select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2; 13 | ``` 14 | 15 | When the data distribution and the table size changes, the optimal join order will be different. The output of this demo is as below. 16 | 17 | ```plain 18 | Iter 66: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/66=100.000 19 | Iter 67: (HashJoin (HashJoin t2 t1) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/67=98.507 20 | Iter 68: (HashJoin t2 (HashJoin t1 t3)) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 66/68=97.059 21 | Iter 69: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 67/69=97.101 22 | Iter 70: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 68/70=97.143 23 | Iter 71: (HashJoin (HashJoin t1 t2) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 69/71=97.183 24 | Iter 72: (HashJoin (HashJoin t2 t1) t3) <-> (best) (HashJoin (HashJoin t1 t2) t3), Accuracy: 69/72=95.833 25 | ``` 26 | 27 | The left plan Lisp representation is the join order determined by the adaptive query optimization algorithm. The right plan is the best plan. The accuracy is the percentage of executions that the adaptive query optimization algorithm generates the best cost-optimal plan. 28 | 29 | To find the optimal plan and compute the accuracy, we set up two optimizers in this demo: the normal optimizer and the optimal optimizer. Each time we insert some data into the tables, we will invoke the normal optimizer once, and invoke the optimal optimizer with all possible combination of join orders, so that the optimal optimizer can produce an optimal plan based on the cost model and the join selectivity. 30 | 31 | As the algorithm can only know the runtime information from last run before new data are added into the tables, there will be some iterations where it cannot generate the optimal plan. But it will converge to the optimal plan as more runtime information is collected. 32 | -------------------------------------------------------------------------------- /docs/src/demo_tpch_q8.md: -------------------------------------------------------------------------------- 1 | # TPC-H Q8 Demo 2 | 3 | 4 | You can run this demo with the following command: 5 | 6 | ```shell 7 | cargo run --release --bin optd-adaptive-tpch-q8 8 | ``` 9 | 10 | In this demo, we create the TPC-H schema with test data of scale 0.01. There are 8 tables in TPC-H Q8, and it is impossible to enumerate all join combinations in one run. The demo will run this query multiple times, each time exploring a subset of the plan space. Therefore, optimization will be fast for each iteration, and as the plan space is more explored in each iteration, the produced plan will converge to the optimal join order. 11 | 12 | ```plain 13 | --- ITERATION 5 --- 14 | plan space size budget used, not applying logical rules any more. current plan space: 10354 15 | (HashJoin region (HashJoin (HashJoin (HashJoin (HashJoin (HashJoin part (HashJoin supplier lineitem)) orders) customer) nation) nation)) 16 | plan space size budget used, not applying logical rules any more. current plan space: 11743 17 | +--------+------------+ 18 | | col0 | col1 | 19 | +--------+------------+ 20 | | 1995.0 | 1.00000000 | 21 | | 1996.0 | 0.32989690 | 22 | +--------+------------+ 23 | 2 rows in set. Query took 0.115 seconds. 24 | ``` 25 | 26 | The output contains the current join order in Lisp representation, the plan space, and the query result. 27 | -------------------------------------------------------------------------------- /docs/src/miscellaneous.md: -------------------------------------------------------------------------------- 1 | # Miscellaneous 2 | 3 | This is a note covering things that do not work well in the system right now. 4 | 5 | ## Type System 6 | 7 | Currently, we hard code decimal type to have `15, 2` precision. Type inferences should be done in the schema property inference. 8 | 9 | ## Expression 10 | 11 | optd supports exploring SQL expressions in the optimization process. However, this might be super inefficient as optimizing a plan node (i.e., join to hash join) usually needs the full binding of an expression tree. This could have exponential plan space and is super inefficient. 12 | 13 | ## Bindings 14 | 15 | We do not have something like a binding iterator as in the Cascades paper. Before applying a rule, we will generate all bindings of a group, which might take a lot of memory. This should be fixed in the future. 16 | 17 | ## Cycle Detection + DAG 18 | 19 | Consider the case for join commute rule. 20 | 21 | ``` 22 | (Join A B) <- group 1 23 | (Projection (Join B A) ) <- group 2 24 | (Projection (Projection (Join A B) ) ) <- group 1 may refer itself 25 | ``` 26 | 27 | After applying the rule twice, the memo table will have self-referential groups. Currently, we detect such self-referential things in optimize group task. Probably there will be better ways to do that. 28 | 29 | The same applies to DAG / Recursive CTEs -- we did not test if the framework works with DAG but in theory it should support it. We just need to ensure a node in DAG does not get searched twice. 30 | 31 | # DAG 32 | 33 | For DAG, another challenge is to recover the reusable fragments from the optimizer output. The optimizer can give you a DAG output but by iterating through the plan, you cannot know which parts can be reused/materialized. Therefore, we might need to produce some extra information with the plan node output. i.e., a graph-representation with metadata of each node, instead of `RelNode`. This also helps the process of inserting the physical collector plan nodes, which is currently a little bit hacky in the implementation. 34 | 35 | ## Memo Table 36 | 37 | Obviously, it is not efficient to simply store a mapping from RelNode to the expression id. Cannot imagine how many levels of depths will it require to compute a hash of a tree structure. 38 | 39 | ## Partial Exploration 40 | 41 | Each iteration will only be slower because we have to invoke the optimize group tasks before we can find a group to apply the rule. Probably we can keep the task stack across runs to make it faster. 42 | 43 | ## Physical Property + Enforcer Rules 44 | 45 | A major missing feature in the optimizer. Need this to support shuffling and sort optimizations. 46 | 47 | ## Pruning 48 | 49 | Currently, we have implemented the pruning condition as in the paper, but we did not actually enable it. 50 | -------------------------------------------------------------------------------- /docs/src/partial_exploration.md: -------------------------------------------------------------------------------- 1 | # Partial Exploration 2 | 3 | When the plan space is very large, optd will generate a sub-optimal plan at first, and then use the runtime information to continue the plan space search next time the same query (or a similar query) is being optimized. This is partial exploration. 4 | 5 | Developers can pass `partial_explore_iter` and `partial_explore_space` to the optimizer options to specify how large the optimizer will expand each time `step_optimize_rel` is invoked. To use partial exploration, developers should not clear the internal state of the optimizer across different runs. 6 | -------------------------------------------------------------------------------- /docs/src/properties.md: -------------------------------------------------------------------------------- 1 | # Properties 2 | 3 | In optd, properties are defined by implementing the `PropertyBuilder` trait in `optd-core/src/property.rs`. Properties will be automatically inferred when plan nodes are added to the memo table. When initializing an optimizer instance, developers will need to provide a vector of properties the optimizer will need to compute throughout the optimization process. 4 | 5 | ## Define a Property 6 | 7 | Currently, optd only supports logical properties. It cannot optimize a query plan with required physical properties for now. An example of property definition is the Datafusion representation's plan node schema, as in `optd-datafusion-repr/src/properties/schema.rs`. 8 | 9 | 10 | ```rust 11 | impl PropertyBuilder for SchemaPropertyBuilder { 12 | type Prop = Schema; 13 | 14 | fn derive( 15 | &self, 16 | typ: OptRelNodeTyp, 17 | data: Option, 18 | children: &[&Self::Prop], 19 | ) -> Self::Prop { 20 | match typ { 21 | OptRelNodeTyp::Scan => { 22 | let name = data.unwrap().as_str().to_string(); 23 | self.catalog.get(&name) 24 | } 25 | // ... 26 | ``` 27 | 28 | The schema property builder implements the `derive` function, which takes the plan node type, plan node data, and the children properties, in order to infer the property of the current plan node. The schema property is stored as a vector of data types in `Schema` structure. In optd, property will be type-erased and stored as `Box` along with each `RelNode` group in the memo table. On the developer side, it does not need to handle all the type-erasing things and will work with typed APIs. 29 | 30 | ## Use a Property 31 | 32 | When initializing an optimizer instance, developers will need to provide a vector of property builders to be computed. The property can then be retrieved using the index in the vector and the property builder type. For example, some optimizer rules will need to know the number of columns of a plan node before rewriting an expression. 33 | 34 | For example, the current Datafusion optd optimizer is initialized with: 35 | 36 | ```rust 37 | CascadesOptimizer::new_with_prop( 38 | rules, 39 | Box::new(cost_model), 40 | vec![Box::new(SchemaPropertyBuilder::new(catalog))], 41 | // .. 42 | ), 43 | ``` 44 | 45 | Therefore, developers can use index 0 and `SchemaPropertyBuilder` to retrieve the schema of a plan node after adding the node into the optimizer memo table. 46 | 47 | ```rust 48 | impl PlanNode { 49 | pub fn schema(&self, optimizer: CascadesOptimizer) -> Schema { 50 | let group_id = optimizer.resolve_group_id(self.0.clone()); 51 | optimizer.get_property_by_group::(group_id, 0 /* property ID */) 52 | } 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /docs/src/reoptimization.md: -------------------------------------------------------------------------------- 1 | # Re-optimization 2 | 3 | optd implements re-optimization inspired by [How I Learned to Stop Worrying and Love Re-optimization](https://arxiv.org/abs/1902.08291). optd generates a plan, injects executors to collect runtime data, and uses the runtime information from the previous run to guide the optimization process. 4 | 5 | optd persists optimizer states from run to run. The states include: the memo table, whether a rule is applied on an expression, explored groups, etc. By persisting the states, optd can easily match a query plan or a subset of the query plan with plans that have been executed. Once these plan matches are discovered, the adaptive cost model can use the runtime data in the cost computation process to make the cost model more robust and accurate. 6 | 7 | ## Cost Model 8 | 9 | In the optd Datafusion representation, we have 2 cost models: the base cost model and the adaptive cost model. The base cost model estimates the compute and I/O cost solely based on number of rows. The adaptive cost model maintains a hash map that maps plan node group ID to runtime information from the previous N runs, and uses these runtime information to compute a more accurate row count. The adaptive cost model will use the accurate row count information to call into the base cost model that computes a more accurate compute and I/O cost. 10 | 11 | ![re-optimization architecture](./optd-cascades/optd-reopt-architecture.svg) 12 | 13 | ## Execution 14 | 15 | optd will inject collector executors into the query plan. We extended Datafusion to have a new executor called physical collector. The executor will count the number of rows passed from the child executor to the parent executor, and then store the information into the runtime data storage. 16 | 17 | ![re-optimization execution plan](./optd-cascades/optd-reopt-plan.svg) 18 | 19 | ## Optimization Phases 20 | 21 | To enable re-optimization, the user should not clear the internal state of the optimizer. This can be achieved by calling `step_clear_winner` and then `step_optimize_rel`. 22 | -------------------------------------------------------------------------------- /docs/src/sqlplannertest.md: -------------------------------------------------------------------------------- 1 | # SQLPlannerTest 2 | 3 | optd uses risinglightdb's SQL planner test library to ensure the optimizer works correctly and stably produces an expected plan. SQL planner test is a regression test. Developers provide the test framework a yaml file with the queries to be optimized and the information they want to collect. The test framework generates the test result and store them in SQL files. When a developer submits a pull request, the reviewers should check if any of these outputs are changed unexpectedly. 4 | 5 | The test cases can be found in `optd-sqlplannertest/tests`. Currently, we check if optd can enumerate all join orders by using the `explain:logical_join_orders,physical_plan` task and check if the query output is as expected by using the `execute` task. 6 | -------------------------------------------------------------------------------- /optd-adaptive-demo/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-adaptive-demo" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | datafusion-optd-cli = { path = "../datafusion-optd-cli" } 10 | rand = "0.8" 11 | datafusion = { version = "43.0.0", features = [ 12 | "avro", 13 | "crypto_expressions", 14 | "encoding_expressions", 15 | "regex_expressions", 16 | "unicode_expressions", 17 | "compression", 18 | ] } 19 | mimalloc = { version = "0.1", default-features = false } 20 | tokio = { version = "1.24", features = [ 21 | "macros", 22 | "rt", 23 | "rt-multi-thread", 24 | "sync", 25 | "parking_lot", 26 | ] } 27 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge" } 28 | optd-datafusion-repr = { path = "../optd-datafusion-repr" } 29 | console = "0.15" 30 | -------------------------------------------------------------------------------- /optd-core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-core" 3 | description = "core library for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | anyhow = "1" 15 | tracing = "0.1" 16 | ordered-float = "4" 17 | itertools = "0.13" 18 | serde = { version = "1.0", features = ["derive", "rc"] } 19 | arrow-schema = "53.3.0" 20 | chrono = "0.4" 21 | erased-serde = "0.4" 22 | pollster = "0.4" 23 | stacker = "0.1" 24 | 25 | [dev-dependencies] 26 | pretty_assertions = "1.4.1" 27 | -------------------------------------------------------------------------------- /optd-core/src/cascades.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | //! The core cascades optimizer implementation. 7 | 8 | mod memo; 9 | mod optimizer; 10 | pub mod rule_match; 11 | mod tasks2; 12 | 13 | pub use memo::{Memo, NaiveMemo}; 14 | pub use optimizer::{CascadesOptimizer, ExprId, GroupId, OptimizerProperties, RelNodeContext}; 15 | -------------------------------------------------------------------------------- /optd-core/src/cost.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use crate::cascades::{CascadesOptimizer, Memo, RelNodeContext}; 7 | use crate::nodes::{ArcPredNode, NodeType}; 8 | 9 | /// The statistics of a group. 10 | pub struct Statistics(pub Box); 11 | 12 | /// The cost of an operation. The cost is represented as a vector of double values. 13 | /// For example, it can be represented as `[compute_cost, io_cost]`. 14 | /// A lower value means a better cost. 15 | #[derive(Default, Clone, Debug, PartialOrd, PartialEq)] 16 | pub struct Cost(pub Vec); 17 | 18 | pub trait CostModel>: 'static + Send + Sync { 19 | /// Compute the cost of a single operation. `RelNodeContext` might be 20 | /// optional in the future when we implement physical property enforcers. 21 | /// If we have not decided the winner for a child group yet, the statistics 22 | /// for that group will be `None`. 23 | #[allow(clippy::too_many_arguments)] 24 | fn compute_operation_cost( 25 | &self, 26 | node: &T, 27 | predicates: &[ArcPredNode], 28 | children_stats: &[Option<&Statistics>], 29 | context: RelNodeContext, 30 | optimizer: &CascadesOptimizer, 31 | ) -> Cost; 32 | 33 | /// Derive the statistics of a single operation. `RelNodeContext` might be 34 | /// optional in the future when we implement physical property enforcers. 35 | fn derive_statistics( 36 | &self, 37 | node: &T, 38 | predicates: &[ArcPredNode], 39 | children_stats: &[&Statistics], 40 | context: RelNodeContext, 41 | optimizer: &CascadesOptimizer, 42 | ) -> Statistics; 43 | 44 | fn explain_cost(&self, cost: &Cost) -> String; 45 | 46 | fn explain_statistics(&self, cost: &Statistics) -> String; 47 | 48 | fn accumulate(&self, total_cost: &mut Cost, cost: &Cost); 49 | 50 | fn sum(&self, operation_cost: &Cost, inputs_cost: &[Cost]) -> Cost { 51 | let mut total_cost = operation_cost.clone(); 52 | for input in inputs_cost { 53 | self.accumulate(&mut total_cost, input); 54 | } 55 | total_cost 56 | } 57 | 58 | /// The zero cost. 59 | fn zero(&self) -> Cost; 60 | 61 | /// The weighted cost of a compound cost. 62 | fn weighted_cost(&self, cost: &Cost) -> f64; 63 | } 64 | -------------------------------------------------------------------------------- /optd-core/src/heuristics.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | mod optimizer; 7 | 8 | pub use optimizer::{ApplyOrder, HeuristicsOptimizer, HeuristicsOptimizerOptions}; 9 | -------------------------------------------------------------------------------- /optd-core/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | #![allow(clippy::new_without_default)] 7 | 8 | pub mod cascades; 9 | pub mod cost; 10 | pub mod heuristics; 11 | pub mod logical_property; 12 | pub mod nodes; 13 | pub mod optimizer; 14 | pub mod physical_property; 15 | pub mod rules; 16 | 17 | #[cfg(test)] 18 | pub(crate) mod tests; 19 | -------------------------------------------------------------------------------- /optd-core/src/logical_property.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::any::Any; 7 | use std::fmt::{Debug, Display}; 8 | 9 | use crate::nodes::{ArcPredNode, NodeType}; 10 | 11 | /// The trait enables we store any logical property in the memo table by erasing the concrete type. 12 | /// In the future, we can implement `serialize`/`deserialize` on this trait so that we can serialize 13 | /// the logical properties. 14 | pub trait LogicalProperty: 'static + Any + Send + Sync + Debug + Display { 15 | fn as_any(&self) -> &dyn Any; 16 | } 17 | 18 | /// A wrapper around the `LogicalPropertyBuilder` so that we can erase the concrete type and store 19 | /// it safely in the memo table. 20 | pub trait LogicalPropertyBuilderAny: 'static + Send + Sync { 21 | fn derive_any( 22 | &self, 23 | typ: T, 24 | predicates: &[ArcPredNode], 25 | children: &[&dyn LogicalProperty], 26 | ) -> Box; 27 | fn property_name(&self) -> &'static str; 28 | } 29 | 30 | /// The trait for building logical properties for a plan node. 31 | pub trait LogicalPropertyBuilder: 'static + Send + Sync + Sized { 32 | type Prop: LogicalProperty + Sized + Clone; 33 | 34 | /// Derive the output logical property based on the input logical properties and the current plan node information. 35 | fn derive(&self, typ: T, predicates: &[ArcPredNode], children: &[&Self::Prop]) 36 | -> Self::Prop; 37 | 38 | fn property_name(&self) -> &'static str; 39 | } 40 | 41 | impl> LogicalPropertyBuilderAny for P { 42 | fn derive_any( 43 | &self, 44 | typ: T, 45 | predicates: &[ArcPredNode], 46 | children: &[&dyn LogicalProperty], 47 | ) -> Box { 48 | let children: Vec<&P::Prop> = children 49 | .iter() 50 | .map(|child| { 51 | child 52 | .as_any() 53 | .downcast_ref::() 54 | .expect("Failed to downcast child") 55 | }) 56 | .collect(); 57 | Box::new(self.derive(typ, predicates, &children)) 58 | } 59 | 60 | fn property_name(&self) -> &'static str { 61 | LogicalPropertyBuilder::property_name(self) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /optd-core/src/optimizer.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use anyhow::Result; 7 | 8 | use crate::logical_property::LogicalPropertyBuilder; 9 | use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup}; 10 | use crate::physical_property::PhysicalProperty; 11 | 12 | pub trait Optimizer { 13 | fn optimize(&mut self, root_rel: ArcPlanNode) -> Result>; 14 | 15 | fn optimize_with_required_props( 16 | &mut self, 17 | root_rel: ArcPlanNode, 18 | required_props: &[&dyn PhysicalProperty], 19 | ) -> Result>; 20 | 21 | fn get_logical_property>( 22 | &self, 23 | root_rel: PlanNodeOrGroup, 24 | idx: usize, 25 | ) -> P::Prop; 26 | } 27 | -------------------------------------------------------------------------------- /optd-core/src/rules.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | mod ir; 7 | 8 | pub use ir::RuleMatcher; 9 | 10 | use crate::nodes::{ArcPlanNode, NodeType, PlanNodeOrGroup}; 11 | use crate::optimizer::Optimizer; 12 | 13 | // TODO: docs, possible renames. 14 | // TODO: Why do we have all of these match types? Seems like possible overkill. 15 | pub trait Rule>: 'static + Send + Sync { 16 | fn matcher(&self) -> &RuleMatcher; 17 | fn apply(&self, optimizer: &O, binding: ArcPlanNode) -> Vec>; 18 | fn name(&self) -> &'static str; 19 | fn is_impl_rule(&self) -> bool { 20 | false 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /optd-core/src/rules/ir.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use crate::nodes::NodeType; 7 | 8 | pub enum RuleMatcher { 9 | /// Match a node of type `typ`. 10 | MatchNode { typ: T, children: Vec }, 11 | /// Match "discriminant" (Only check for variant matches---don't consider 12 | /// inner data). 13 | /// This may be useful when, for example, one has an enum variant such as 14 | /// ConstantExpr(ConstantType), and one wants to match on all ConstantExpr 15 | /// regardless of the inner ConstantType. 16 | MatchDiscriminant { 17 | typ_discriminant: std::mem::Discriminant, 18 | children: Vec, 19 | }, 20 | /// Match any plan node. 21 | Any, 22 | /// Match all plan node. 23 | AnyMany, 24 | } 25 | -------------------------------------------------------------------------------- /optd-core/src/tests.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub(crate) mod common; 7 | pub(crate) mod heuristics_physical_property; 8 | -------------------------------------------------------------------------------- /optd-datafusion-bridge/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-datafusion-bridge" 3 | description = "datafusion bridge for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | datafusion = "43.0.0" 15 | datafusion-expr = "43.0.0" 16 | async-trait = "0.1" 17 | itertools = "0.13" 18 | optd-core = { path = "../optd-core", version = "0.1" } 19 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } 20 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" } 21 | anyhow = "1" 22 | async-recursion = "1" 23 | futures-lite = "2" 24 | futures-util = "0.3" 25 | tracing = "0.1" 26 | -------------------------------------------------------------------------------- /optd-datafusion-repr-adv-cost/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-datafusion-repr-adv-cost" 3 | description = "datafusion plan representation for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | [dependencies] 12 | anyhow = "1" 13 | arrow-schema = "53.3.0" 14 | assert_approx_eq = "1.1.0" 15 | datafusion = "43.0.0" 16 | ordered-float = "4" 17 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } 18 | optd-core = { path = "../optd-core", version = "0.1" } 19 | serde = { version = "1.0", features = ["derive"] } 20 | rayon = "1.10" 21 | itertools = "0.13" 22 | test-case = "3.3" 23 | tracing = "0.1" 24 | optd-gungnir = { path = "../optd-gungnir", version = "0.1" } 25 | serde_with = { version = "3.7.0", features = ["json"] } 26 | -------------------------------------------------------------------------------- /optd-datafusion-repr-adv-cost/src/adv_stats/agg.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_datafusion_repr::plan_nodes::{ArcDfPredNode, DfReprPredNode, ListPred}; 7 | use optd_datafusion_repr::properties::column_ref::{ 8 | BaseTableColumnRef, ColumnRef, GroupColumnRefs, 9 | }; 10 | use serde::de::DeserializeOwned; 11 | use serde::Serialize; 12 | 13 | use super::AdvStats; 14 | use crate::adv_stats::stats::{Distribution, MostCommonValues}; 15 | use crate::adv_stats::DEFAULT_NUM_DISTINCT; 16 | 17 | impl< 18 | M: MostCommonValues + Serialize + DeserializeOwned, 19 | D: Distribution + Serialize + DeserializeOwned, 20 | > AdvStats 21 | { 22 | pub(crate) fn get_agg_row_cnt( 23 | &self, 24 | group_by: ArcDfPredNode, 25 | output_col_refs: GroupColumnRefs, 26 | ) -> f64 { 27 | let group_by = ListPred::from_pred_node(group_by).unwrap(); 28 | if group_by.is_empty() { 29 | 1.0 30 | } else { 31 | // Multiply the n-distinct of all the group by columns. 32 | // TODO: improve with multi-dimensional n-distinct 33 | output_col_refs 34 | .base_table_column_refs() 35 | .iter() 36 | .take(group_by.len()) 37 | .map(|col_ref| match col_ref { 38 | ColumnRef::BaseTableColumnRef(BaseTableColumnRef { table, col_idx }) => { 39 | let table_stats = self.per_table_stats_map.get(table); 40 | let column_stats = table_stats.and_then(|table_stats| { 41 | table_stats.column_comb_stats.get(&vec![*col_idx]) 42 | }); 43 | 44 | if let Some(column_stats) = column_stats { 45 | column_stats.ndistinct as f64 46 | } else { 47 | // The column type is not supported or stats are missing. 48 | DEFAULT_NUM_DISTINCT as f64 49 | } 50 | } 51 | ColumnRef::Derived => DEFAULT_NUM_DISTINCT as f64, 52 | _ => panic!( 53 | "GROUP BY base table column ref must either be derived or base table" 54 | ), 55 | }) 56 | .product() 57 | } 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /optd-datafusion-repr-adv-cost/src/adv_stats/limit.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_datafusion_repr::plan_nodes::{ArcDfPredNode, ConstantPred, DfReprPredNode}; 7 | use serde::de::DeserializeOwned; 8 | use serde::Serialize; 9 | 10 | use super::AdvStats; 11 | use crate::adv_stats::stats::{Distribution, MostCommonValues}; 12 | 13 | impl< 14 | M: MostCommonValues + Serialize + DeserializeOwned, 15 | D: Distribution + Serialize + DeserializeOwned, 16 | > AdvStats 17 | { 18 | pub(crate) fn get_limit_row_cnt(&self, child_row_cnt: f64, fetch_expr: ArcDfPredNode) -> f64 { 19 | let fetch = ConstantPred::from_pred_node(fetch_expr) 20 | .unwrap() 21 | .value() 22 | .as_i64(); 23 | // u64::MAX represents None 24 | if fetch == i64::MAX { 25 | child_row_cnt 26 | } else { 27 | child_row_cnt.min(fetch as f64) 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /optd-datafusion-repr/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-datafusion-repr" 3 | description = "datafusion plan representation for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | anyhow = "1" 15 | arrow-schema = "53.3.0" 16 | tracing = "0.1" 17 | pretty-xmlish = "0.1" 18 | itertools = "0.13" 19 | optd-core = { path = "../optd-core", version = "0.1" } 20 | camelpaste = "0.1" 21 | datafusion-expr = "43.0.0" 22 | serde = { version = "1.0", features = ["derive"] } 23 | bincode = "1.3.3" 24 | heck = "0.5" 25 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/cost.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod adaptive_cost; 7 | pub mod base_cost; 8 | 9 | pub use adaptive_cost::{AdaptiveCostModel, RuntimeAdaptionStorage}; 10 | pub use base_cost::{DfCostModel, COMPUTE_COST, IO_COST}; 11 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/optimizer_ext.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::PlanNodeOrGroup; 7 | use optd_core::optimizer::Optimizer; 8 | 9 | use crate::plan_nodes::DfNodeType; 10 | use crate::properties::column_ref::{ColumnRefPropertyBuilder, GroupColumnRefs}; 11 | use crate::properties::schema::{Schema, SchemaPropertyBuilder}; 12 | 13 | pub trait OptimizerExt: Optimizer { 14 | fn get_schema_of(&self, root_rel: PlanNodeOrGroup) -> Schema; 15 | fn get_column_ref_of(&self, root_rel: PlanNodeOrGroup) -> GroupColumnRefs; 16 | } 17 | 18 | impl> OptimizerExt for O { 19 | fn get_schema_of(&self, root_rel: PlanNodeOrGroup) -> Schema { 20 | self.get_logical_property::(root_rel, 0) 21 | } 22 | 23 | fn get_column_ref_of(&self, root_rel: PlanNodeOrGroup) -> GroupColumnRefs { 24 | self.get_logical_property::(root_rel, 1) 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/agg.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use super::macros::define_plan_node; 7 | use super::predicates::ListPred; 8 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode}; 9 | 10 | #[derive(Clone, Debug)] 11 | pub struct LogicalAgg(pub ArcDfPlanNode); 12 | 13 | define_plan_node!( 14 | LogicalAgg : DfPlanNode, 15 | Agg, [ 16 | { 0, child: ArcDfPlanNode } 17 | ], [ 18 | { 0, exprs: ListPred }, 19 | { 1, groups: ListPred } 20 | ] 21 | ); 22 | 23 | #[derive(Clone, Debug)] 24 | pub struct PhysicalAgg(pub ArcDfPlanNode); 25 | 26 | define_plan_node!( 27 | PhysicalAgg : DfPlanNode, 28 | PhysicalAgg, [ 29 | { 0, child: ArcDfPlanNode } 30 | ], [ 31 | { 0, aggrs: ListPred }, 32 | { 1, groups: ListPred } 33 | ] 34 | ); 35 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/filter.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use super::macros::define_plan_node; 7 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode}; 8 | 9 | #[derive(Clone, Debug)] 10 | pub struct LogicalFilter(pub ArcDfPlanNode); 11 | 12 | define_plan_node!( 13 | LogicalFilter : DfPlanNode, 14 | Filter, [ 15 | { 0, child: ArcDfPlanNode } 16 | ], [ 17 | { 0, cond: ArcDfPredNode } 18 | ] 19 | ); 20 | 21 | #[derive(Clone, Debug)] 22 | pub struct PhysicalFilter(pub ArcDfPlanNode); 23 | 24 | define_plan_node!( 25 | PhysicalFilter : DfPlanNode, 26 | PhysicalFilter, [ 27 | { 0, child: ArcDfPlanNode } 28 | ], [ 29 | { 0, cond: ArcDfPredNode } 30 | ] 31 | ); 32 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/join.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use core::fmt; 7 | use std::fmt::Display; 8 | 9 | use super::macros::define_plan_node; 10 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred}; 11 | 12 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 13 | pub enum JoinType { 14 | Inner = 1, 15 | FullOuter, 16 | LeftOuter, 17 | RightOuter, 18 | LeftSemi, 19 | RightSemi, 20 | LeftAnti, 21 | RightAnti, 22 | LeftMark, 23 | } 24 | 25 | impl Display for JoinType { 26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 27 | write!(f, "{:?}", self) 28 | } 29 | } 30 | 31 | #[derive(Clone, Debug)] 32 | pub struct LogicalJoin(pub ArcDfPlanNode); 33 | 34 | define_plan_node!( 35 | LogicalJoin : DfPlanNode, 36 | Join, [ 37 | { 0, left: ArcDfPlanNode }, 38 | { 1, right: ArcDfPlanNode } 39 | ], [ 40 | { 0, cond: ArcDfPredNode } 41 | ], { join_type: JoinType } 42 | ); 43 | 44 | #[derive(Clone, Debug)] 45 | pub struct PhysicalNestedLoopJoin(pub ArcDfPlanNode); 46 | 47 | define_plan_node!( 48 | PhysicalNestedLoopJoin : DfPlanNode, 49 | PhysicalNestedLoopJoin, [ 50 | { 0, left: ArcDfPlanNode }, 51 | { 1, right: ArcDfPlanNode } 52 | ], [ 53 | { 0, cond: ArcDfPredNode } 54 | ], { join_type: JoinType } 55 | ); 56 | 57 | #[derive(Clone, Debug)] 58 | pub struct PhysicalHashJoin(pub ArcDfPlanNode); 59 | 60 | define_plan_node!( 61 | PhysicalHashJoin : DfPlanNode, 62 | PhysicalHashJoin, [ 63 | { 0, left: ArcDfPlanNode }, 64 | { 1, right: ArcDfPlanNode } 65 | ], [ 66 | { 0, left_keys: ListPred }, 67 | { 1, right_keys: ListPred } 68 | ], { join_type: JoinType } 69 | ); 70 | 71 | impl LogicalJoin { 72 | /// Takes in left/right schema sizes, and maps a column index to be as if it 73 | /// were pushed down to the left or right side of a join accordingly. 74 | pub fn map_through_join( 75 | col_idx: usize, 76 | left_schema_size: usize, 77 | right_schema_size: usize, 78 | ) -> usize { 79 | assert!(col_idx < left_schema_size + right_schema_size); 80 | if col_idx < left_schema_size { 81 | col_idx 82 | } else { 83 | col_idx - left_schema_size 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/limit.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use super::macros::define_plan_node; 7 | use super::{ArcDfPlanNode, ArcDfPredNode, DfNodeType, DfPlanNode, DfReprPlanNode}; 8 | 9 | #[derive(Clone, Debug)] 10 | pub struct LogicalLimit(pub ArcDfPlanNode); 11 | 12 | define_plan_node!( 13 | LogicalLimit : DfPlanNode, 14 | Limit, [ 15 | { 0, child: ArcDfPlanNode } 16 | ], [ 17 | { 0, skip: ArcDfPredNode }, 18 | { 1, fetch: ArcDfPredNode } 19 | ] 20 | ); 21 | 22 | #[derive(Clone, Debug)] 23 | pub struct PhysicalLimit(pub ArcDfPlanNode); 24 | 25 | define_plan_node!( 26 | PhysicalLimit : DfPlanNode, 27 | PhysicalLimit, [ 28 | { 0, child: ArcDfPlanNode } 29 | ], [ 30 | { 0, skip: ArcDfPredNode }, 31 | { 1, fetch: ArcDfPredNode } 32 | ] 33 | ); 34 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/between_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::PlanNodeMetaMap; 7 | use pretty_xmlish::Pretty; 8 | 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 10 | 11 | #[derive(Clone, Debug)] 12 | pub struct BetweenPred(pub ArcDfPredNode); 13 | 14 | impl BetweenPred { 15 | pub fn new(child: ArcDfPredNode, lower: ArcDfPredNode, upper: ArcDfPredNode) -> Self { 16 | BetweenPred( 17 | DfPredNode { 18 | typ: DfPredType::Between, 19 | children: vec![child, lower, upper], 20 | data: None, 21 | } 22 | .into(), 23 | ) 24 | } 25 | 26 | pub fn child(&self) -> ArcDfPredNode { 27 | self.0.child(0) 28 | } 29 | 30 | pub fn lower(&self) -> ArcDfPredNode { 31 | self.0.child(1) 32 | } 33 | 34 | pub fn upper(&self) -> ArcDfPredNode { 35 | self.0.child(2) 36 | } 37 | } 38 | 39 | impl DfReprPredNode for BetweenPred { 40 | fn into_pred_node(self) -> ArcDfPredNode { 41 | self.0 42 | } 43 | 44 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 45 | if !matches!(pred_node.typ, DfPredType::Between) { 46 | return None; 47 | } 48 | Some(Self(pred_node)) 49 | } 50 | 51 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 52 | Pretty::simple_record( 53 | "Between", 54 | vec![ 55 | ("child", self.child().explain(meta_map)), 56 | ("lower", self.lower().explain(meta_map)), 57 | ("upper", self.upper().explain(meta_map)), 58 | ], 59 | vec![], 60 | ) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/cast_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use arrow_schema::DataType; 7 | use optd_core::nodes::PlanNodeMetaMap; 8 | use pretty_xmlish::Pretty; 9 | 10 | use super::data_type_pred::DataTypePred; 11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 12 | 13 | #[derive(Clone, Debug)] 14 | pub struct CastPred(pub ArcDfPredNode); 15 | 16 | impl CastPred { 17 | pub fn new(child: ArcDfPredNode, cast_to: DataType) -> Self { 18 | CastPred( 19 | DfPredNode { 20 | typ: DfPredType::Cast, 21 | children: vec![child, DataTypePred::new(cast_to).into_pred_node()], 22 | data: None, 23 | } 24 | .into(), 25 | ) 26 | } 27 | 28 | pub fn child(&self) -> ArcDfPredNode { 29 | self.0.child(0) 30 | } 31 | 32 | pub fn cast_to(&self) -> DataType { 33 | DataTypePred::from_pred_node(self.0.child(1)) 34 | .unwrap() 35 | .data_type() 36 | } 37 | } 38 | 39 | impl DfReprPredNode for CastPred { 40 | fn into_pred_node(self) -> ArcDfPredNode { 41 | self.0 42 | } 43 | 44 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 45 | if !matches!(pred_node.typ, DfPredType::Cast) { 46 | return None; 47 | } 48 | Some(Self(pred_node)) 49 | } 50 | 51 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 52 | Pretty::simple_record( 53 | "Cast", 54 | vec![ 55 | ("cast_to", format!("{}", self.cast_to()).into()), 56 | ("child", self.child().explain(meta_map)), 57 | ], 58 | vec![], 59 | ) 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/column_ref_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::{PlanNodeMetaMap, Value}; 7 | use pretty_xmlish::Pretty; 8 | 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 10 | 11 | #[derive(Clone, Debug)] 12 | pub struct ColumnRefPred(pub ArcDfPredNode); 13 | 14 | impl ColumnRefPred { 15 | /// Creates a new `ColumnRef` expression. 16 | pub fn new(column_idx: usize) -> ColumnRefPred { 17 | // this conversion is always safe since usize is at most u64 18 | let u64_column_idx = column_idx as u64; 19 | ColumnRefPred( 20 | DfPredNode { 21 | typ: DfPredType::ColumnRef, 22 | children: vec![], 23 | data: Some(Value::UInt64(u64_column_idx)), 24 | } 25 | .into(), 26 | ) 27 | } 28 | 29 | fn get_data_usize(&self) -> usize { 30 | self.0.data.as_ref().unwrap().as_u64() as usize 31 | } 32 | 33 | /// Gets the column index. 34 | pub fn index(&self) -> usize { 35 | self.get_data_usize() 36 | } 37 | } 38 | 39 | impl DfReprPredNode for ColumnRefPred { 40 | fn into_pred_node(self) -> ArcDfPredNode { 41 | self.0 42 | } 43 | 44 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 45 | if pred_node.typ != DfPredType::ColumnRef { 46 | return None; 47 | } 48 | Some(Self(pred_node)) 49 | } 50 | 51 | fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 52 | Pretty::display(&format!("#{}", self.index())) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/data_type_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use arrow_schema::DataType; 7 | use optd_core::nodes::PlanNodeMetaMap; 8 | use pretty_xmlish::Pretty; 9 | 10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 11 | 12 | #[derive(Clone, Debug)] 13 | pub struct DataTypePred(pub ArcDfPredNode); 14 | 15 | impl DataTypePred { 16 | pub fn new(typ: DataType) -> Self { 17 | DataTypePred( 18 | DfPredNode { 19 | typ: DfPredType::DataType(typ), 20 | children: vec![], 21 | data: None, 22 | } 23 | .into(), 24 | ) 25 | } 26 | 27 | pub fn data_type(&self) -> DataType { 28 | if let DfPredType::DataType(ref data_type) = self.0.typ { 29 | data_type.clone() 30 | } else { 31 | panic!("not a data type") 32 | } 33 | } 34 | } 35 | 36 | impl DfReprPredNode for DataTypePred { 37 | fn into_pred_node(self) -> ArcDfPredNode { 38 | self.0 39 | } 40 | 41 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 42 | if !matches!(pred_node.typ, DfPredType::DataType(_)) { 43 | return None; 44 | } 45 | Some(Self(pred_node)) 46 | } 47 | 48 | fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 49 | Pretty::display(&self.data_type().to_string()) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/extern_column_ref_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::{PlanNodeMetaMap, Value}; 7 | use pretty_xmlish::Pretty; 8 | 9 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 10 | 11 | #[derive(Clone, Debug)] 12 | pub struct ExternColumnRefPred(pub ArcDfPredNode); 13 | 14 | impl ExternColumnRefPred { 15 | /// Creates a new `DepExternColumnRef` expression. 16 | pub fn new(column_idx: usize) -> ExternColumnRefPred { 17 | // this conversion is always safe since usize is at most u64 18 | let u64_column_idx = column_idx as u64; 19 | ExternColumnRefPred( 20 | DfPredNode { 21 | typ: DfPredType::ExternColumnRef, 22 | children: vec![], 23 | data: Some(Value::UInt64(u64_column_idx)), 24 | } 25 | .into(), 26 | ) 27 | } 28 | 29 | fn get_data_usize(&self) -> usize { 30 | self.0.data.as_ref().unwrap().as_u64() as usize 31 | } 32 | 33 | /// Gets the column index. 34 | pub fn index(&self) -> usize { 35 | self.get_data_usize() 36 | } 37 | } 38 | 39 | impl DfReprPredNode for ExternColumnRefPred { 40 | fn into_pred_node(self) -> ArcDfPredNode { 41 | self.0 42 | } 43 | 44 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 45 | if pred_node.typ != DfPredType::ExternColumnRef { 46 | return None; 47 | } 48 | Some(Self(pred_node)) 49 | } 50 | 51 | fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 52 | Pretty::display(&format!("Extern(#{})", self.index())) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/in_list_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::{PlanNodeMetaMap, Value}; 7 | use pretty_xmlish::Pretty; 8 | 9 | use super::ListPred; 10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 11 | 12 | #[derive(Clone, Debug)] 13 | pub struct InListPred(pub ArcDfPredNode); 14 | 15 | impl InListPred { 16 | pub fn new(child: ArcDfPredNode, list: ListPred, negated: bool) -> Self { 17 | InListPred( 18 | DfPredNode { 19 | typ: DfPredType::InList, 20 | children: vec![child, list.into_pred_node()], 21 | data: Some(Value::Bool(negated)), 22 | } 23 | .into(), 24 | ) 25 | } 26 | 27 | pub fn child(&self) -> ArcDfPredNode { 28 | self.0.child(0) 29 | } 30 | 31 | pub fn list(&self) -> ListPred { 32 | ListPred::from_pred_node(self.0.child(1)).unwrap() 33 | } 34 | 35 | /// `true` for `NOT IN`. 36 | pub fn negated(&self) -> bool { 37 | self.0.data.as_ref().unwrap().as_bool() 38 | } 39 | } 40 | 41 | impl DfReprPredNode for InListPred { 42 | fn into_pred_node(self) -> ArcDfPredNode { 43 | self.0 44 | } 45 | 46 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 47 | if !matches!(pred_node.typ, DfPredType::InList) { 48 | return None; 49 | } 50 | Some(Self(pred_node)) 51 | } 52 | 53 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 54 | Pretty::simple_record( 55 | "InList", 56 | vec![ 57 | ("expr", self.child().explain(meta_map)), 58 | ("list", self.list().explain(meta_map)), 59 | ("negated", self.negated().to_string().into()), 60 | ], 61 | vec![], 62 | ) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/list_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use itertools::Itertools; 7 | use optd_core::nodes::PlanNodeMetaMap; 8 | use pretty_xmlish::Pretty; 9 | 10 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 11 | 12 | #[derive(Clone, Debug)] 13 | pub struct ListPred(pub ArcDfPredNode); 14 | 15 | impl ListPred { 16 | pub fn new(preds: Vec) -> Self { 17 | ListPred( 18 | DfPredNode { 19 | typ: DfPredType::List, 20 | children: preds, 21 | data: None, 22 | } 23 | .into(), 24 | ) 25 | } 26 | 27 | /// Gets number of expressions in the list 28 | pub fn len(&self) -> usize { 29 | self.0.children.len() 30 | } 31 | 32 | pub fn is_empty(&self) -> bool { 33 | self.0.children.is_empty() 34 | } 35 | 36 | pub fn child(&self, idx: usize) -> ArcDfPredNode { 37 | self.0.child(idx) 38 | } 39 | 40 | pub fn to_vec(&self) -> Vec { 41 | self.0.children.clone() 42 | } 43 | } 44 | 45 | impl DfReprPredNode for ListPred { 46 | fn into_pred_node(self) -> ArcDfPredNode { 47 | self.0 48 | } 49 | 50 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 51 | if pred_node.typ != DfPredType::List { 52 | return None; 53 | } 54 | Some(Self(pred_node)) 55 | } 56 | 57 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 58 | Pretty::Array( 59 | (0..self.len()) 60 | .map(|x| self.child(x).explain(meta_map)) 61 | .collect_vec(), 62 | ) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/sort_order_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::fmt::Display; 7 | 8 | use optd_core::nodes::PlanNodeMetaMap; 9 | use pretty_xmlish::Pretty; 10 | 11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 12 | 13 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 14 | pub enum SortOrderType { 15 | Asc, 16 | Desc, 17 | } 18 | 19 | impl Display for SortOrderType { 20 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 21 | write!(f, "{:?}", self) 22 | } 23 | } 24 | 25 | #[derive(Clone, Debug)] 26 | pub struct SortOrderPred(pub ArcDfPredNode); 27 | 28 | impl SortOrderPred { 29 | pub fn new(order: SortOrderType, child: ArcDfPredNode) -> Self { 30 | SortOrderPred( 31 | DfPredNode { 32 | typ: DfPredType::SortOrder(order), 33 | children: vec![child], 34 | data: None, 35 | } 36 | .into(), 37 | ) 38 | } 39 | 40 | pub fn child(&self) -> ArcDfPredNode { 41 | self.0.child(0) 42 | } 43 | 44 | pub fn order(&self) -> SortOrderType { 45 | if let DfPredType::SortOrder(order) = self.0.typ { 46 | order 47 | } else { 48 | panic!("not a sort order expr") 49 | } 50 | } 51 | } 52 | 53 | impl DfReprPredNode for SortOrderPred { 54 | fn into_pred_node(self) -> ArcDfPredNode { 55 | self.0 56 | } 57 | 58 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 59 | if !matches!(pred_node.typ, DfPredType::SortOrder(_)) { 60 | return None; 61 | } 62 | Some(Self(pred_node)) 63 | } 64 | 65 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 66 | Pretty::simple_record( 67 | "SortOrder", 68 | vec![("order", self.order().to_string().into())], 69 | vec![self.child().explain(meta_map)], 70 | ) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/predicates/un_op_pred.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::fmt::Display; 7 | 8 | use optd_core::nodes::PlanNodeMetaMap; 9 | use pretty_xmlish::Pretty; 10 | 11 | use crate::plan_nodes::{ArcDfPredNode, DfPredNode, DfPredType, DfReprPredNode}; 12 | 13 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 14 | pub enum UnOpType { 15 | Neg = 1, 16 | Not, 17 | } 18 | 19 | impl Display for UnOpType { 20 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 21 | write!(f, "{:?}", self) 22 | } 23 | } 24 | 25 | #[derive(Clone, Debug)] 26 | pub struct UnOpPred(pub ArcDfPredNode); 27 | 28 | impl UnOpPred { 29 | pub fn new(child: ArcDfPredNode, op_type: UnOpType) -> Self { 30 | UnOpPred( 31 | DfPredNode { 32 | typ: DfPredType::UnOp(op_type), 33 | children: vec![child], 34 | data: None, 35 | } 36 | .into(), 37 | ) 38 | } 39 | 40 | pub fn child(&self) -> ArcDfPredNode { 41 | self.0.child(0) 42 | } 43 | 44 | pub fn op_type(&self) -> UnOpType { 45 | if let DfPredType::UnOp(op_type) = self.0.typ { 46 | op_type 47 | } else { 48 | panic!("not a un op") 49 | } 50 | } 51 | } 52 | 53 | impl DfReprPredNode for UnOpPred { 54 | fn into_pred_node(self) -> ArcDfPredNode { 55 | self.0 56 | } 57 | 58 | fn from_pred_node(pred_node: ArcDfPredNode) -> Option { 59 | if !matches!(pred_node.typ, DfPredType::UnOp(_)) { 60 | return None; 61 | } 62 | Some(Self(pred_node)) 63 | } 64 | 65 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 66 | Pretty::simple_record( 67 | self.op_type().to_string(), 68 | vec![], 69 | vec![self.child().explain(meta_map)], 70 | ) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/projection.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use super::macros::define_plan_node; 7 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred}; 8 | 9 | #[derive(Clone, Debug)] 10 | pub struct LogicalProjection(pub ArcDfPlanNode); 11 | 12 | define_plan_node!( 13 | LogicalProjection : DfPlanNode, 14 | Projection, [ 15 | { 0, child: ArcDfPlanNode } 16 | ], [ 17 | { 0, exprs: ListPred } 18 | ] 19 | ); 20 | 21 | #[derive(Clone, Debug)] 22 | pub struct PhysicalProjection(pub ArcDfPlanNode); 23 | 24 | define_plan_node!( 25 | PhysicalProjection : DfPlanNode, 26 | PhysicalProjection, [ 27 | { 0, child: ArcDfPlanNode } 28 | ], [ 29 | { 0, exprs: ListPred } 30 | ] 31 | ); 32 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/scan.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::sync::Arc; 7 | 8 | use optd_core::nodes::PlanNodeMetaMap; 9 | use pretty_xmlish::Pretty; 10 | 11 | use super::{ArcDfPlanNode, ConstantPred, DfNodeType, DfPlanNode, DfReprPlanNode, DfReprPredNode}; 12 | use crate::explain::Insertable; 13 | 14 | #[derive(Clone, Debug)] 15 | pub struct LogicalScan(pub ArcDfPlanNode); 16 | 17 | impl DfReprPlanNode for LogicalScan { 18 | fn into_plan_node(self) -> ArcDfPlanNode { 19 | self.0 20 | } 21 | 22 | fn from_plan_node(plan_node: ArcDfPlanNode) -> Option { 23 | if plan_node.typ != DfNodeType::Scan { 24 | return None; 25 | } 26 | Some(Self(plan_node)) 27 | } 28 | 29 | fn explain(&self, _meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 30 | Pretty::childless_record( 31 | "LogicalScan", 32 | vec![("table", self.table().to_string().into())], 33 | ) 34 | } 35 | } 36 | 37 | impl LogicalScan { 38 | pub fn new(table: String) -> LogicalScan { 39 | LogicalScan( 40 | DfPlanNode { 41 | typ: DfNodeType::Scan, 42 | children: vec![], 43 | predicates: vec![ConstantPred::string(table).into_pred_node()], 44 | } 45 | .into(), 46 | ) 47 | } 48 | 49 | pub fn table(&self) -> Arc { 50 | ConstantPred::from_pred_node(self.0.predicates.first().unwrap().clone()) 51 | .unwrap() 52 | .value() 53 | .as_str() 54 | } 55 | } 56 | 57 | #[derive(Clone, Debug)] 58 | pub struct PhysicalScan(pub ArcDfPlanNode); 59 | 60 | impl DfReprPlanNode for PhysicalScan { 61 | fn into_plan_node(self) -> ArcDfPlanNode { 62 | self.0 63 | } 64 | 65 | fn from_plan_node(plan_node: ArcDfPlanNode) -> Option { 66 | if plan_node.typ != DfNodeType::PhysicalScan { 67 | return None; 68 | } 69 | Some(Self(plan_node)) 70 | } 71 | 72 | fn explain(&self, meta_map: Option<&PlanNodeMetaMap>) -> Pretty<'static> { 73 | let mut fields = vec![("table", self.table().to_string().into())]; 74 | if let Some(meta_map) = meta_map { 75 | fields = fields.with_meta(self.0.get_meta(meta_map)); 76 | } 77 | Pretty::childless_record("PhysicalScan", fields) 78 | } 79 | } 80 | 81 | impl PhysicalScan { 82 | pub fn table(&self) -> Arc { 83 | ConstantPred::from_pred_node(self.0.predicates.first().unwrap().clone()) 84 | .unwrap() 85 | .value() 86 | .as_str() 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/sort.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use super::macros::define_plan_node; 7 | use super::{ArcDfPlanNode, DfNodeType, DfPlanNode, DfReprPlanNode, ListPred}; 8 | 9 | #[derive(Clone, Debug)] 10 | pub struct LogicalSort(pub ArcDfPlanNode); 11 | 12 | // each expression in ExprList is represented as a SortOrderExpr 13 | // 1. nulls_first is not included from DF 14 | // 2. node type defines sort order per expression 15 | // 3. actual expr is stored as a child of this node 16 | define_plan_node!( 17 | LogicalSort : DfPlanNode, 18 | Sort, [ 19 | { 0, child: ArcDfPlanNode } 20 | ], [ 21 | { 0, exprs: ListPred } 22 | ] 23 | ); 24 | 25 | #[derive(Clone, Debug)] 26 | pub struct PhysicalSort(pub ArcDfPlanNode); 27 | 28 | define_plan_node!( 29 | PhysicalSort : DfPlanNode, 30 | PhysicalSort, [ 31 | { 0, child: ArcDfPlanNode } 32 | ], [ 33 | { 0, exprs: ListPred } 34 | ] 35 | ); 36 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/plan_nodes/subquery.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use core::fmt; 7 | use std::fmt::Display; 8 | 9 | use super::macros::define_plan_node; 10 | use super::{ 11 | ArcDfPlanNode, ArcDfPredNode, BinOpType, DfNodeType, DfPlanNode, DfPredNode, DfReprPlanNode, 12 | ListPred, 13 | }; 14 | 15 | /// These are the only three fundamental types of subqueries. 16 | /// Refer to the Unnesting Arbitrary Queries talk by Mark Raasveldt for 17 | /// info on how to translate other subquery types to these three. 18 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 19 | pub enum SubqueryType { 20 | Scalar, 21 | Exists, 22 | Any { pred: DfPredNode, op: BinOpType }, 23 | } 24 | 25 | impl Display for SubqueryType { 26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 27 | write!(f, "{:?}", self) 28 | } 29 | } 30 | 31 | #[derive(Clone, Debug)] 32 | pub struct RawDependentJoin(pub ArcDfPlanNode); 33 | 34 | define_plan_node!( 35 | RawDependentJoin : DfReprPlanNode, 36 | RawDepJoin, [ 37 | { 0, left: ArcDfPlanNode }, 38 | { 1, right: ArcDfPlanNode } 39 | ], [ 40 | { 0, cond: ArcDfPredNode }, 41 | { 1, extern_cols: ListPred } 42 | ], { sq_type: SubqueryType } 43 | ); 44 | 45 | #[derive(Clone, Debug)] 46 | pub struct DependentJoin(pub ArcDfPlanNode); 47 | 48 | define_plan_node!( 49 | DependentJoin : DfReprPlanNode, 50 | DepJoin, [ 51 | { 0, left: ArcDfPlanNode }, 52 | { 1, right: ArcDfPlanNode } 53 | ], [ 54 | { 0, cond: ArcDfPredNode }, 55 | { 1, extern_cols: ListPred } 56 | ] 57 | ); 58 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/properties.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod column_ref; 7 | pub mod schema; 8 | 9 | const DEFAULT_NAME: &str = "unnamed"; 10 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/rules.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | mod eliminate_duplicated_expr; 7 | mod eliminate_limit; 8 | mod filter; 9 | mod filter_pushdown; 10 | mod joins; 11 | mod macros; 12 | mod physical; 13 | mod project_transpose; 14 | mod subquery; 15 | 16 | pub use eliminate_duplicated_expr::*; 17 | pub use eliminate_limit::*; 18 | pub use filter::*; 19 | pub use filter_pushdown::*; 20 | pub use joins::*; 21 | pub use physical::PhysicalConversionRule; 22 | pub use project_transpose::*; 23 | pub use subquery::{ 24 | DepInitialDistinct, DepJoinEliminate, DepJoinPastAgg, DepJoinPastFilter, DepJoinPastProj, 25 | }; 26 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/rules/eliminate_limit.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::nodes::PlanNodeOrGroup; 7 | use optd_core::optimizer::Optimizer; 8 | use optd_core::rules::{Rule, RuleMatcher}; 9 | 10 | use super::macros::define_rule; 11 | use crate::plan_nodes::{ 12 | ArcDfPlanNode, ConstantPred, ConstantType, DfNodeType, DfPredType, DfReprPlanNode, 13 | DfReprPredNode, LogicalEmptyRelation, LogicalLimit, 14 | }; 15 | use crate::OptimizerExt; 16 | 17 | define_rule!(EliminateLimitRule, apply_eliminate_limit, (Limit, child)); 18 | 19 | /// Transformations: 20 | /// - Limit with skip 0 and no fetch -> Eliminate from the tree 21 | /// - Limit with limit 0 -> EmptyRelation 22 | fn apply_eliminate_limit( 23 | optimizer: &impl Optimizer, 24 | binding: ArcDfPlanNode, 25 | ) -> Vec> { 26 | let limit = LogicalLimit::from_plan_node(binding).unwrap(); 27 | let skip = limit.skip(); 28 | let fetch = limit.fetch(); 29 | let child = limit.child(); 30 | if let DfPredType::Constant(ConstantType::Int64) = skip.typ { 31 | if let DfPredType::Constant(ConstantType::Int64) = fetch.typ { 32 | let skip_val = ConstantPred::from_pred_node(skip).unwrap().value().as_i64(); 33 | 34 | let fetch_val = ConstantPred::from_pred_node(fetch) 35 | .unwrap() 36 | .value() 37 | .as_i64(); 38 | 39 | // Bad convention to have u64 max represent None 40 | let fetch_is_none = fetch_val == i64::MAX; 41 | 42 | let schema = optimizer.get_schema_of(child.clone()); 43 | if fetch_is_none && skip_val == 0 { 44 | return vec![child]; 45 | } else if fetch_val == 0 { 46 | let node = LogicalEmptyRelation::new(false, schema); 47 | return vec![node.into_plan_node().into()]; 48 | } 49 | } 50 | } 51 | vec![] 52 | } 53 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/rules/project_transpose.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod project_filter_transpose; 7 | pub mod project_join_transpose; 8 | pub mod project_merge; 9 | pub mod project_transpose_common; 10 | 11 | pub use project_filter_transpose::*; 12 | pub use project_join_transpose::*; 13 | pub use project_merge::*; 14 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/rules/project_transpose/project_join_transpose.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::vec; 7 | 8 | use optd_core::nodes::PlanNodeOrGroup; 9 | use optd_core::optimizer::Optimizer; 10 | use optd_core::rules::RuleMatcher; 11 | 12 | use super::project_transpose_common::ProjectionMapping; 13 | use crate::plan_nodes::{ 14 | ArcDfPlanNode, ColumnRefPred, DfNodeType, DfReprPlanNode, DfReprPredNode, JoinType, ListPred, 15 | LogicalJoin, LogicalProjection, 16 | }; 17 | use crate::rules::macros::define_rule; 18 | use crate::{OptimizerExt, Rule}; 19 | 20 | // (Proj A) join B -> (Proj (A join B)) 21 | define_rule!( 22 | ProjectionPullUpJoin, 23 | apply_projection_pull_up_join, 24 | (Join(JoinType::Inner), (Projection, left), right) 25 | ); 26 | 27 | fn apply_projection_pull_up_join( 28 | optimizer: &impl Optimizer, 29 | binding: ArcDfPlanNode, 30 | ) -> Vec> { 31 | let join = LogicalJoin::from_plan_node(binding).unwrap(); 32 | let projection = LogicalProjection::from_plan_node(join.left().unwrap_plan_node()).unwrap(); 33 | let left = projection.child(); 34 | let right = join.right(); 35 | let list = projection.exprs(); 36 | let cond = join.cond(); 37 | 38 | let projection = LogicalProjection::new_unchecked(left.clone(), list.clone()); 39 | 40 | let Some(mapping) = ProjectionMapping::build(&projection.exprs()) else { 41 | return vec![]; 42 | }; 43 | 44 | // TODO(chi): support capture projection node. 45 | let left_schema = optimizer.get_schema_of(left.clone()); 46 | let right_schema = optimizer.get_schema_of(right.clone()); 47 | let mut new_projection_exprs = list.to_vec(); 48 | for i in 0..right_schema.len() { 49 | let col = ColumnRefPred::new(i + left_schema.len()).into_pred_node(); 50 | new_projection_exprs.push(col); 51 | } 52 | let node = LogicalProjection::new( 53 | LogicalJoin::new_unchecked( 54 | left, 55 | right, 56 | mapping.rewrite_join_cond(cond, left_schema.len()), 57 | JoinType::Inner, 58 | ) 59 | .into_plan_node(), 60 | ListPred::new(new_projection_exprs), 61 | ); 62 | vec![node.into_plan_node().into()] 63 | } 64 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/rules/subquery.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod depjoin_pushdown; 7 | 8 | pub use depjoin_pushdown::{ 9 | DepInitialDistinct, DepJoinEliminate, DepJoinPastAgg, DepJoinPastFilter, DepJoinPastProj, 10 | }; 11 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/testing.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | mod dummy_cost; 7 | mod tpch_catalog; 8 | 9 | use std::sync::Arc; 10 | 11 | use optd_core::heuristics::{ApplyOrder, HeuristicsOptimizer, HeuristicsOptimizerOptions}; 12 | use optd_core::rules::Rule; 13 | 14 | use self::tpch_catalog::TpchCatalog; 15 | use crate::plan_nodes::DfNodeType; 16 | use crate::properties::schema::SchemaPropertyBuilder; 17 | 18 | /// Create a "dummy" optimizer preloaded with the TPC-H catalog for testing 19 | /// Note: Only provides the schema property currently 20 | pub fn new_test_optimizer( 21 | rule: Arc>>, 22 | ) -> HeuristicsOptimizer { 23 | let dummy_catalog = Arc::new(TpchCatalog); 24 | 25 | HeuristicsOptimizer::new_with_rules( 26 | vec![rule], 27 | HeuristicsOptimizerOptions { 28 | apply_order: ApplyOrder::TopDown, 29 | enable_physical_prop_passthrough: true, 30 | }, 31 | Arc::new([Box::new(SchemaPropertyBuilder::new(dummy_catalog))]), 32 | Arc::new([]), 33 | ) 34 | } 35 | -------------------------------------------------------------------------------- /optd-datafusion-repr/src/testing/dummy_cost.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext}; 7 | use optd_core::cost::{Cost, CostModel, Statistics}; 8 | 9 | use crate::plan_nodes::{ArcDfPredNode, DfNodeType}; 10 | 11 | /// Dummy cost model that returns a 0 cost in all cases. 12 | /// Intended for testing with the cascades optimizer. 13 | pub struct DummyCostModel; 14 | 15 | impl CostModel> for DummyCostModel { 16 | /// Compute the cost of a single operation 17 | fn compute_operation_cost( 18 | &self, 19 | _: &DfNodeType, 20 | _: &[ArcDfPredNode], 21 | _: &[Option<&Statistics>], 22 | _: RelNodeContext, 23 | _: &CascadesOptimizer, 24 | ) -> Cost { 25 | Cost(vec![1.0]) 26 | } 27 | 28 | /// Derive the statistics of a single operation 29 | fn derive_statistics( 30 | &self, 31 | _: &DfNodeType, 32 | _: &[ArcDfPredNode], 33 | _: &[&Statistics], 34 | _: RelNodeContext, 35 | _: &CascadesOptimizer, 36 | ) -> Statistics { 37 | Statistics(Box::new(())) 38 | } 39 | 40 | fn explain_cost(&self, _: &Cost) -> String { 41 | "dummy_cost".to_string() 42 | } 43 | 44 | fn explain_statistics(&self, _: &Statistics) -> String { 45 | "dummy_statistics".to_string() 46 | } 47 | 48 | fn weighted_cost(&self, cost: &Cost) -> f64 { 49 | cost.0[0] 50 | } 51 | 52 | fn accumulate(&self, total_cost: &mut Cost, cost: &Cost) { 53 | total_cost.0[0] += cost.0[0]; 54 | } 55 | 56 | fn zero(&self) -> Cost { 57 | Cost(vec![0.0]) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /optd-gungnir/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-gungnir" 3 | description = "statistics for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | itertools = "0.13" 15 | rand = "0.8" 16 | crossbeam = "0.8" 17 | lazy_static = "1.4" 18 | serde = { version = "1.0", features = ["derive"] } 19 | serde_with = { version = "3.7.0", features = ["json"] } 20 | ordered-float = "4" 21 | optd-core = { path = "../optd-core", version = "0.1.0" } 22 | hashbrown = { version = "0.14", features = ["serde"] } 23 | murmur2 = "0.1" 24 | -------------------------------------------------------------------------------- /optd-gungnir/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | #![allow(clippy::new_without_default)] 7 | 8 | pub mod stats; 9 | pub mod utils; 10 | -------------------------------------------------------------------------------- /optd-gungnir/src/stats.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod counter; 7 | pub mod hyperloglog; 8 | pub mod misragries; 9 | pub mod murmur2; 10 | pub mod tdigest; 11 | -------------------------------------------------------------------------------- /optd-gungnir/src/stats/murmur2.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | //! Implementation of the MurmurHash2 function, for 64b outputs, by Austin Appleby (2008). 7 | //! Note: Assumes little-endian machines. 8 | 9 | /// Returns the MurmurHash2 (u64) given a stream of bytes and a seed. 10 | pub fn murmur_hash(bytes: &[u8], seed: u64) -> u64 { 11 | murmur2::murmur64a(bytes, seed) 12 | } 13 | 14 | // Start of unit testing section. 15 | #[cfg(test)] 16 | mod tests { 17 | use super::murmur_hash; 18 | #[test] 19 | fn murmur_string() { 20 | assert_eq!( 21 | murmur_hash("Hyper🪵🪵 Rules!".as_bytes(), 1257851387), 22 | 1623602735526180105 23 | ); 24 | assert_eq!( 25 | murmur_hash( 26 | "All work and no play makes Jack a dull boy".as_bytes(), 27 | 1111111111 28 | ), 29 | 1955247671966919985 30 | ); 31 | assert_eq!(murmur_hash("".as_bytes(), 0), 0); 32 | assert_eq!( 33 | murmur_hash("Gungnir™".as_bytes(), 4242424242), 34 | 13329505761566523763 35 | ); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /optd-gungnir/src/utils.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod arith_encoder; 7 | -------------------------------------------------------------------------------- /optd-perfbench/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-perfbench" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | datafusion = { version = "43.0.0", features = [ 10 | "avro", 11 | "crypto_expressions", 12 | "encoding_expressions", 13 | "regex_expressions", 14 | "unicode_expressions", 15 | "compression", 16 | "serde", 17 | ] } 18 | optd-datafusion-repr = { path = "../optd-datafusion-repr" } 19 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost" } 20 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge" } 21 | datafusion-optd-cli = { path = "../datafusion-optd-cli" } 22 | futures = "0.3" 23 | anyhow = { version = "1", features = ["backtrace"] } 24 | async-trait = "0.1" 25 | tokio = { version = "1.24", features = [ 26 | "macros", 27 | "rt", 28 | "rt-multi-thread", 29 | "sync", 30 | "parking_lot", 31 | ] } 32 | shlex = "1.3" 33 | tokio-postgres = "0.7" 34 | regex = "1.10" 35 | clap = { version = "4.5.4", features = ["derive"] } 36 | log = "0.4" 37 | env_logger = "0.11" 38 | lazy_static = "1.4.0" 39 | prettytable-rs = "0.10" 40 | serde = "1.0" 41 | serde_json = "1.0" 42 | test-case = "3.3" 43 | rayon = "1.10" 44 | parquet = "53.3.0" 45 | csv2parquet = { git = "https://github.com/skyzh/arrow-tools.git", branch = "main" } 46 | 47 | [dev-dependencies] 48 | assert_cmd = "2.0" 49 | -------------------------------------------------------------------------------- /optd-perfbench/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | pub mod benchmark; 7 | pub mod cardbench; 8 | mod datafusion_dbms; 9 | pub mod job; 10 | mod postgres_dbms; 11 | pub mod shell; 12 | pub mod tpch; 13 | mod truecard; 14 | -------------------------------------------------------------------------------- /optd-perfbench/src/truecard.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::collections::HashMap; 7 | use std::fs::{self, File}; 8 | use std::path::{Path, PathBuf}; 9 | 10 | use async_trait::async_trait; 11 | 12 | use crate::benchmark::Benchmark; 13 | 14 | #[async_trait] 15 | pub trait TruecardGetter { 16 | async fn get_benchmark_truecards( 17 | &mut self, 18 | benchmark: &Benchmark, 19 | ) -> anyhow::Result>; 20 | } 21 | 22 | /// A cache that gets persisted to disk for the true cardinalities of all queries of all benchmarks 23 | pub struct TruecardCache { 24 | truecard_cache_fpath: PathBuf, 25 | cache: HashMap>, 26 | } 27 | 28 | impl TruecardCache { 29 | pub fn build>(truecard_cache_fpath: P) -> anyhow::Result { 30 | let truecard_cache_fpath = PathBuf::from(truecard_cache_fpath.as_ref()); 31 | let cache = if truecard_cache_fpath.exists() { 32 | let file = File::open(&truecard_cache_fpath)?; 33 | serde_json::from_reader(file)? 34 | } else { 35 | HashMap::new() 36 | }; 37 | 38 | Ok(Self { 39 | truecard_cache_fpath, 40 | cache, 41 | }) 42 | } 43 | 44 | pub fn insert_truecard( 45 | &mut self, 46 | data_and_queries_name: &str, 47 | query_id: &str, 48 | truecard: usize, 49 | ) { 50 | let db_cache = match self.cache.get_mut(data_and_queries_name) { 51 | Some(db_cache) => db_cache, 52 | None => { 53 | self.cache 54 | .insert(String::from(data_and_queries_name), HashMap::new()); 55 | self.cache.get_mut(data_and_queries_name).unwrap() 56 | } 57 | }; 58 | db_cache.insert(String::from(query_id), truecard); 59 | } 60 | 61 | pub fn get_truecard(&self, data_and_queries_name: &str, query_id: &str) -> Option { 62 | self.cache 63 | .get(data_and_queries_name) 64 | .and_then(|db_cache| db_cache.get(query_id).copied()) 65 | } 66 | 67 | pub fn save(&self) -> anyhow::Result<()> { 68 | fs::create_dir_all(self.truecard_cache_fpath.parent().unwrap())?; 69 | // this will create a new file or truncate the file if it already exists 70 | let file = File::create(&self.truecard_cache_fpath)?; 71 | serde_json::to_writer_pretty(file, &self.cache)?; 72 | Ok(()) 73 | } 74 | } 75 | 76 | impl Drop for TruecardCache { 77 | fn drop(&mut self) { 78 | self.save().unwrap(); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /optd-sqllogictest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-sqllogictest" 3 | description = "sqllogictest for optd" 4 | version.workspace = true 5 | edition.workspace = true 6 | homepage.workspace = true 7 | keywords.workspace = true 8 | license.workspace = true 9 | repository.workspace = true 10 | 11 | [dependencies] 12 | thiserror = "2" 13 | sqllogictest = "0.22" 14 | clap = { version = "4.5.4", features = ["derive"] } 15 | anyhow = { version = "1", features = ["backtrace"] } 16 | async-trait = "0.1" 17 | datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "43.0.0" } 18 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" } 19 | datafusion = { version = "43.0.0", features = [ 20 | "avro", 21 | "crypto_expressions", 22 | "encoding_expressions", 23 | "regex_expressions", 24 | "unicode_expressions", 25 | "compression", 26 | ] } 27 | env_logger = "0.9" 28 | mimalloc = { version = "0.1", default-features = false } 29 | regex = "1.8" 30 | tokio = { version = "1.24", features = [ 31 | "macros", 32 | "rt", 33 | "rt-multi-thread", 34 | "sync", 35 | "parking_lot", 36 | ] } 37 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } 38 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } 39 | itertools = "0.13" 40 | lazy_static = "1.4.0" 41 | 42 | [[test]] 43 | name = "harness" 44 | path = "./tests/harness.rs" 45 | harness = false 46 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/_basic_tables.slt.part: -------------------------------------------------------------------------------- 1 | statement ok 2 | create table t1(v1 int, v2 int); 3 | 4 | statement ok 5 | create table t2(v3 int, v4 int); 6 | 7 | statement ok 8 | insert into t1 values (1, 100), (2, 200), (2, 250), (3, 300), (3, 300); 9 | 10 | statement ok 11 | insert into t2 values (2, 200), (2, 250), (3, 300); 12 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/basic.slt: -------------------------------------------------------------------------------- 1 | query I 2 | select 1; 3 | ---- 4 | 1 5 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q1.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | l_returnflag, 6 | l_linestatus, 7 | sum(l_quantity) as sum_qty, 8 | sum(l_extendedprice) as sum_base_price, 9 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 10 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 11 | avg(l_quantity) as avg_qty, 12 | avg(l_extendedprice) as avg_price, 13 | avg(l_discount) as avg_disc, 14 | count(*) as count_order 15 | from 16 | lineitem 17 | where 18 | l_shipdate <= date '1998-12-01' - interval '71' day 19 | group by 20 | l_returnflag, 21 | l_linestatus 22 | order by 23 | l_returnflag, 24 | l_linestatus; 25 | ---- 26 | A F 37474.00 37569624.64 35676192.0970 37101416.222424 25.354533 25419.231826 0.050866 1478 27 | N F 1041.00 1041301.07 999060.8980 1036450.802280 27.394736 27402.659736 0.042894 38 28 | N O 75823.00 76040604.76 72270477.1588 75140545.284463 25.564059 25637.425745 0.049824 2966 29 | R F 36511.00 36570841.24 34738472.8758 36169060.112193 25.059025 25100.096938 0.050027 1457 30 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q11.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | ps_partkey, 6 | sum(ps_supplycost * ps_availqty) as value 7 | from 8 | partsupp, 9 | supplier, 10 | nation 11 | where 12 | ps_suppkey = s_suppkey 13 | and s_nationkey = n_nationkey 14 | and n_name = 'GERMANY' 15 | group by 16 | ps_partkey having 17 | sum(ps_supplycost * ps_availqty) > ( 18 | select 19 | sum(ps_supplycost * ps_availqty) * 0.0001000000 20 | from 21 | partsupp, 22 | supplier, 23 | nation 24 | where 25 | ps_suppkey = s_suppkey 26 | and s_nationkey = n_nationkey 27 | and n_name = 'GERMANY' 28 | ) 29 | order by 30 | value desc; 31 | ---- 32 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q12.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | l_shipmode, 6 | sum(case 7 | when o_orderpriority = '1-URGENT' 8 | or o_orderpriority = '2-HIGH' 9 | then 1 10 | else 0 11 | end) as high_line_count, 12 | sum(case 13 | when o_orderpriority <> '1-URGENT' 14 | and o_orderpriority <> '2-HIGH' 15 | then 1 16 | else 0 17 | end) as low_line_count 18 | from 19 | orders, 20 | lineitem 21 | where 22 | o_orderkey = l_orderkey 23 | and l_shipmode in ('MAIL', 'SHIP') 24 | and l_commitdate < l_receiptdate 25 | and l_shipdate < l_commitdate 26 | and l_receiptdate >= date '1994-01-01' 27 | and l_receiptdate < date '1994-01-01' + interval '1' year 28 | group by 29 | l_shipmode 30 | order by 31 | l_shipmode; 32 | ---- 33 | MAIL 5 5 34 | SHIP 5 10 35 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q13.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | c_count, 6 | count(*) as custdist 7 | from 8 | ( 9 | select 10 | c_custkey, 11 | count(o_orderkey) 12 | from 13 | customer left outer join orders on 14 | c_custkey = o_custkey 15 | and o_comment not like '%special%requests%' 16 | group by 17 | c_custkey 18 | ) as c_orders (c_custkey, c_count) 19 | group by 20 | c_count 21 | order by 22 | custdist desc, 23 | c_count desc; 24 | ---- 25 | 0 50 26 | 16 8 27 | 17 7 28 | 20 6 29 | 13 6 30 | 12 6 31 | 9 6 32 | 23 5 33 | 14 5 34 | 10 5 35 | 21 4 36 | 18 4 37 | 11 4 38 | 8 4 39 | 7 4 40 | 26 3 41 | 22 3 42 | 6 3 43 | 5 3 44 | 4 3 45 | 29 2 46 | 24 2 47 | 19 2 48 | 15 2 49 | 28 1 50 | 25 1 51 | 3 1 52 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q14.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | 100.00 * sum(case 6 | when p_type like 'PROMO%' 7 | then l_extendedprice * (1 - l_discount) 8 | else 0 9 | end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 10 | from 11 | lineitem, 12 | part 13 | where 14 | l_partkey = p_partkey 15 | and l_shipdate >= date '1995-09-01' 16 | and l_shipdate < date '1995-09-01' + interval '1' month; 17 | ---- 18 | 15.23021261159725 19 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q15.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | statement ok 4 | create view revenue0 (supplier_no, total_revenue) as 5 | select 6 | l_suppkey, 7 | sum(l_extendedprice * (1 - l_discount)) 8 | from 9 | lineitem 10 | where 11 | l_shipdate >= date '1996-01-01' 12 | and l_shipdate < date '1996-01-01' + interval '3' month 13 | group by 14 | l_suppkey; 15 | 16 | query 17 | select 18 | s_suppkey, 19 | s_name, 20 | s_address, 21 | s_phone, 22 | total_revenue 23 | from 24 | supplier, 25 | revenue0 26 | where 27 | s_suppkey = supplier_no 28 | and total_revenue = ( 29 | select 30 | max(total_revenue) 31 | from 32 | revenue0 33 | ) 34 | order by 35 | s_suppkey; 36 | ---- 37 | 10 Supplier#000000010 Saygah3gYWMp72i PY 34-852-489-8585 797313.3838 38 | 39 | statement ok 40 | drop view revenue0; 41 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q16.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | p_brand, 6 | p_type, 7 | p_size, 8 | count(distinct ps_suppkey) as supplier_cnt 9 | from 10 | partsupp, 11 | part 12 | where 13 | p_partkey = ps_partkey 14 | and p_brand <> 'Brand#45' 15 | and p_type not like 'MEDIUM POLISHED%' 16 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) 17 | and ps_suppkey not in ( 18 | select 19 | s_suppkey 20 | from 21 | supplier 22 | where 23 | s_comment like '%Customer%Complaints%' 24 | ) 25 | group by 26 | p_brand, 27 | p_type, 28 | p_size 29 | order by 30 | supplier_cnt desc, 31 | p_brand, 32 | p_type, 33 | p_size; 34 | ---- 35 | Brand#11 PROMO ANODIZED TIN 45 4 36 | Brand#11 SMALL PLATED COPPER 45 4 37 | Brand#11 STANDARD POLISHED TIN 45 4 38 | Brand#13 MEDIUM ANODIZED STEEL 36 4 39 | Brand#13 SMALL BRUSHED NICKEL 19 4 40 | Brand#14 SMALL ANODIZED NICKEL 45 4 41 | Brand#15 LARGE ANODIZED BRASS 45 4 42 | Brand#21 LARGE BURNISHED COPPER 19 4 43 | Brand#23 ECONOMY BRUSHED COPPER 9 4 44 | Brand#24 MEDIUM PLATED STEEL 19 4 45 | Brand#25 MEDIUM PLATED BRASS 45 4 46 | Brand#25 SMALL BURNISHED COPPER 3 4 47 | Brand#31 ECONOMY PLATED STEEL 23 4 48 | Brand#31 PROMO POLISHED TIN 23 4 49 | Brand#32 MEDIUM BURNISHED BRASS 49 4 50 | Brand#33 LARGE BRUSHED TIN 36 4 51 | Brand#33 SMALL BURNISHED NICKEL 3 4 52 | Brand#34 LARGE PLATED BRASS 45 4 53 | Brand#34 MEDIUM BRUSHED COPPER 9 4 54 | Brand#34 SMALL PLATED BRASS 14 4 55 | Brand#35 STANDARD ANODIZED STEEL 23 4 56 | Brand#43 MEDIUM ANODIZED BRASS 14 4 57 | Brand#43 PROMO POLISHED BRASS 19 4 58 | Brand#43 SMALL BRUSHED NICKEL 9 4 59 | Brand#44 SMALL PLATED COPPER 19 4 60 | Brand#51 ECONOMY POLISHED STEEL 49 4 61 | Brand#52 MEDIUM BURNISHED TIN 45 4 62 | Brand#52 SMALL BURNISHED NICKEL 14 4 63 | Brand#53 LARGE BURNISHED NICKEL 23 4 64 | Brand#53 MEDIUM BRUSHED COPPER 3 4 65 | Brand#53 STANDARD PLATED STEEL 45 4 66 | Brand#54 ECONOMY ANODIZED BRASS 9 4 67 | Brand#55 STANDARD ANODIZED BRASS 36 4 68 | Brand#55 STANDARD BRUSHED COPPER 3 4 69 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q17.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | sum(l_extendedprice) / 7.0 as avg_yearly 6 | from 7 | lineitem, 8 | part 9 | where 10 | p_partkey = l_partkey 11 | and p_brand = 'Brand#53' -- original: Brand#23 12 | and p_container = 'MED BOX' 13 | and l_quantity < ( 14 | select 15 | 0.2 * avg(l_quantity) 16 | from 17 | lineitem 18 | where 19 | l_partkey = p_partkey 20 | ); 21 | ---- 22 | 863.2285714285715 23 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q18.slt.disabled: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | c_name, 6 | c_custkey, 7 | o_orderkey, 8 | o_orderdate, 9 | o_totalprice, 10 | sum(l_quantity) 11 | from 12 | customer, 13 | orders, 14 | lineitem 15 | where 16 | o_orderkey in ( 17 | select 18 | l_orderkey 19 | from 20 | lineitem 21 | group by 22 | l_orderkey having 23 | sum(l_quantity) > 250 -- original: 300 24 | ) 25 | and c_custkey = o_custkey 26 | and o_orderkey = l_orderkey 27 | group by 28 | c_name, 29 | c_custkey, 30 | o_orderkey, 31 | o_orderdate, 32 | o_totalprice 33 | order by 34 | o_totalprice desc, 35 | o_orderdate 36 | limit 100; 37 | ---- 38 | Customer#000000070 70 2567 1998-02-27 263411.29 266.00 39 | Customer#000000010 10 4421 1997-04-04 258779.02 255.00 40 | Customer#000000082 82 3460 1995-10-03 245976.74 254.00 41 | Customer#000000068 68 2208 1995-05-01 245388.06 256.00 42 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q19.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | sum(l_extendedprice* (1 - l_discount)) as revenue 6 | from 7 | lineitem, 8 | part 9 | where 10 | ( 11 | p_partkey = l_partkey 12 | and p_brand = 'Brand#12' 13 | and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 14 | and l_quantity >= 1 and l_quantity <= 1 + 10 15 | and p_size between 1 and 5 16 | and l_shipmode in ('AIR', 'AIR REG') 17 | and l_shipinstruct = 'DELIVER IN PERSON' 18 | ) 19 | or 20 | ( 21 | p_partkey = l_partkey 22 | and p_brand = 'Brand#23' 23 | and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 24 | and l_quantity >= 10 and l_quantity <= 10 + 10 25 | and p_size between 1 and 10 26 | and l_shipmode in ('AIR', 'AIR REG') 27 | and l_shipinstruct = 'DELIVER IN PERSON' 28 | ) 29 | or 30 | ( 31 | p_partkey = l_partkey 32 | and p_brand = 'Brand#33' 33 | and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 34 | and l_quantity >= 20 and l_quantity <= 20 + 10 35 | and p_size between 1 and 15 36 | and l_shipmode in ('AIR', 'AIR REG') 37 | and l_shipinstruct = 'DELIVER IN PERSON' 38 | ); 39 | ---- 40 | 24521.1300 41 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q2.slt.disabled: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | s_acctbal, 6 | s_name, 7 | n_name, 8 | p_partkey, 9 | p_mfgr, 10 | s_address, 11 | s_phone, 12 | s_comment 13 | from 14 | part, 15 | supplier, 16 | partsupp, 17 | nation, 18 | region 19 | where 20 | p_partkey = ps_partkey 21 | and s_suppkey = ps_suppkey 22 | and p_size = 1 23 | and p_type like '%TIN' 24 | and s_nationkey = n_nationkey 25 | and n_regionkey = r_regionkey 26 | and r_name = 'AFRICA' 27 | and ps_supplycost = ( 28 | select 29 | min(ps_supplycost) 30 | from 31 | partsupp, 32 | supplier, 33 | nation, 34 | region 35 | where 36 | p_partkey = ps_partkey 37 | and s_suppkey = ps_suppkey 38 | and s_nationkey = n_nationkey 39 | and n_regionkey = r_regionkey 40 | and r_name = 'AFRICA' 41 | ) 42 | order by 43 | s_acctbal desc, 44 | n_name, 45 | s_name, 46 | p_partkey; 47 | ---- 48 | 1365.79 Supplier#000000006 KENYA 154 Manufacturer#1 tQxuVm7s7CnK 24-696-997-4969 final accounts. regular dolphins use against the furiously ironic decoys. 49 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q20.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | s_name, 6 | s_address 7 | from 8 | supplier, 9 | nation 10 | where 11 | s_suppkey in ( 12 | select 13 | ps_suppkey 14 | from 15 | partsupp 16 | where 17 | ps_partkey in ( 18 | select 19 | p_partkey 20 | from 21 | part 22 | where 23 | p_name like 'indian%' 24 | ) 25 | and ps_availqty > ( 26 | select 27 | 0.5 * sum(l_quantity) 28 | from 29 | lineitem 30 | where 31 | l_partkey = ps_partkey 32 | and l_suppkey = ps_suppkey 33 | and l_shipdate >= date '1996-01-01' 34 | and l_shipdate < date '1996-01-01' + interval '1' year 35 | ) 36 | ) 37 | and s_nationkey = n_nationkey 38 | and n_name = 'IRAQ' 39 | order by 40 | s_name; 41 | ---- 42 | Supplier#000000005 Gcdm2rJRzl5qlTVzc 43 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q21.slt.disabled: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | s_name, 6 | count(*) as numwait 7 | from 8 | supplier, 9 | lineitem l1, 10 | orders, 11 | nation 12 | where 13 | s_suppkey = l1.l_suppkey 14 | and o_orderkey = l1.l_orderkey 15 | and o_orderstatus = 'F' 16 | and l1.l_receiptdate > l1.l_commitdate 17 | and exists ( 18 | select 19 | * 20 | from 21 | lineitem l2 22 | where 23 | l2.l_orderkey = l1.l_orderkey 24 | and l2.l_suppkey <> l1.l_suppkey 25 | ) 26 | and not exists ( 27 | select 28 | * 29 | from 30 | lineitem l3 31 | where 32 | l3.l_orderkey = l1.l_orderkey 33 | and l3.l_suppkey <> l1.l_suppkey 34 | and l3.l_receiptdate > l3.l_commitdate 35 | ) 36 | and s_nationkey = n_nationkey 37 | and n_name = 'SAUDI ARABIA' 38 | group by 39 | s_name 40 | order by 41 | numwait desc, 42 | s_name 43 | limit 100; 44 | ---- 45 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q22.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | cntrycode, 6 | count(*) as numcust, 7 | sum(c_acctbal) as totacctbal 8 | from 9 | ( 10 | select 11 | substring(c_phone from 1 for 2) as cntrycode, 12 | c_acctbal 13 | from 14 | customer 15 | where 16 | substring(c_phone from 1 for 2) in 17 | ('13', '31', '23', '29', '30', '18', '17') 18 | and c_acctbal > ( 19 | select 20 | avg(c_acctbal) 21 | from 22 | customer 23 | where 24 | c_acctbal > 0.00 25 | and substring(c_phone from 1 for 2) in 26 | ('13', '31', '23', '29', '30', '18', '17') 27 | ) 28 | and not exists ( 29 | select 30 | * 31 | from 32 | orders 33 | where 34 | o_custkey = c_custkey 35 | ) 36 | ) as custsale 37 | group by 38 | cntrycode 39 | order by 40 | cntrycode; 41 | ---- 42 | 13 1 5679.84 43 | 17 1 9127.27 44 | 18 2 14647.99 45 | 23 1 9255.67 46 | 29 2 17195.08 47 | 30 1 7638.57 48 | 31 1 9331.13 49 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q3.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | l_orderkey, 6 | sum(l_extendedprice * (1 - l_discount)) as revenue, 7 | o_orderdate, 8 | o_shippriority 9 | from 10 | customer, 11 | orders, 12 | lineitem 13 | where 14 | c_mktsegment = 'BUILDING' 15 | and c_custkey = o_custkey 16 | and l_orderkey = o_orderkey 17 | and o_orderdate < date '1995-03-15' 18 | and l_shipdate > date '1995-03-15' 19 | group by 20 | l_orderkey, 21 | o_orderdate, 22 | o_shippriority 23 | order by 24 | revenue desc, 25 | o_orderdate 26 | limit 10; 27 | ---- 28 | 1637 164224.9253 1995-02-08 0 29 | 5191 49378.3094 1994-12-11 0 30 | 742 43728.0480 1994-12-23 0 31 | 3492 43716.0724 1994-11-24 0 32 | 2883 36666.9612 1995-01-23 0 33 | 998 11785.5486 1994-11-26 0 34 | 3430 4726.6775 1994-12-12 0 35 | 4423 3055.9365 1995-02-17 0 36 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q4.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | o_orderpriority, 6 | count(*) as order_count 7 | from 8 | orders 9 | where 10 | o_orderdate >= date '1993-07-01' 11 | and o_orderdate < date '1993-07-01' + interval '3' month 12 | and exists ( 13 | select 14 | * 15 | from 16 | lineitem 17 | where 18 | l_orderkey = o_orderkey 19 | and l_commitdate < l_receiptdate 20 | ) 21 | group by 22 | o_orderpriority 23 | order by 24 | o_orderpriority; 25 | ---- 26 | 1-URGENT 9 27 | 2-HIGH 7 28 | 3-MEDIUM 9 29 | 4-NOT SPECIFIED 8 30 | 5-LOW 12 31 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q5.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | n_name, 6 | sum(l_extendedprice * (1 - l_discount)) as revenue 7 | from 8 | customer, 9 | orders, 10 | lineitem, 11 | supplier, 12 | nation, 13 | region 14 | where 15 | c_custkey = o_custkey 16 | and l_orderkey = o_orderkey 17 | and l_suppkey = s_suppkey 18 | and c_nationkey = s_nationkey 19 | and s_nationkey = n_nationkey 20 | and n_regionkey = r_regionkey 21 | and r_name = 'AFRICA' 22 | and o_orderdate >= date '1994-01-01' 23 | and o_orderdate < date '1994-01-01' + interval '1' year 24 | group by 25 | n_name 26 | order by 27 | revenue desc; 28 | ---- 29 | MOROCCO 220457.0142 30 | ETHIOPIA 115183.8546 31 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q6.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | sum(l_extendedprice * l_discount) as revenue 6 | from 7 | lineitem 8 | where 9 | l_shipdate >= date '1994-01-01' 10 | and l_shipdate < date '1994-01-01' + interval '1' year 11 | and l_discount between 0.08 - 0.01 and 0.08 + 0.01 12 | and l_quantity < 24; 13 | ---- 14 | 90927.6243 15 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q7.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | supp_nation, 6 | cust_nation, 7 | l_year, 8 | sum(volume) as revenue 9 | from 10 | ( 11 | select 12 | n1.n_name as supp_nation, 13 | n2.n_name as cust_nation, 14 | extract(year from l_shipdate) as l_year, 15 | l_extendedprice * (1 - l_discount) as volume 16 | from 17 | supplier, 18 | lineitem, 19 | orders, 20 | customer, 21 | nation n1, 22 | nation n2 23 | where 24 | s_suppkey = l_suppkey 25 | and o_orderkey = l_orderkey 26 | and c_custkey = o_custkey 27 | and s_nationkey = n1.n_nationkey 28 | and c_nationkey = n2.n_nationkey 29 | and ( 30 | (n1.n_name = 'UNITED STATES' and n2.n_name = 'CHINA') 31 | or (n1.n_name = 'CHINA' and n2.n_name = 'UNITED STATES') 32 | ) 33 | and l_shipdate between date '1995-01-01' and date '1996-12-31' 34 | ) as shipping 35 | group by 36 | supp_nation, 37 | cust_nation, 38 | l_year 39 | order by 40 | supp_nation, 41 | cust_nation, 42 | l_year; 43 | ---- 44 | UNITED STATES CHINA 1995.0 130212.3261 45 | UNITED STATES CHINA 1996.0 195468.6891 46 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q8.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | o_year, 6 | sum(case 7 | when nation = 'IRAQ' then volume 8 | else 0 9 | end) / sum(volume) as mkt_share 10 | from 11 | ( 12 | select 13 | extract(year from o_orderdate) as o_year, 14 | l_extendedprice * (1 - l_discount) as volume, 15 | n2.n_name as nation 16 | from 17 | part, 18 | supplier, 19 | lineitem, 20 | orders, 21 | customer, 22 | nation n1, 23 | nation n2, 24 | region 25 | where 26 | p_partkey = l_partkey 27 | and s_suppkey = l_suppkey 28 | and l_orderkey = o_orderkey 29 | and o_custkey = c_custkey 30 | and c_nationkey = n1.n_nationkey 31 | and n1.n_regionkey = r_regionkey 32 | and r_name = 'AMERICA' 33 | and s_nationkey = n2.n_nationkey 34 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 35 | and p_type = 'ECONOMY ANODIZED STEEL' 36 | ) as all_nations 37 | group by 38 | o_year 39 | order by 40 | o_year; 41 | ---- 42 | 1995.0 1.00000000 43 | 1996.0 0.32989690 44 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/tpch-q9.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | select 5 | nation, 6 | o_year, 7 | sum(amount) as sum_profit 8 | from 9 | ( 10 | select 11 | n_name as nation, 12 | extract(year from o_orderdate) as o_year, 13 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 14 | from 15 | part, 16 | supplier, 17 | lineitem, 18 | partsupp, 19 | orders, 20 | nation 21 | where 22 | s_suppkey = l_suppkey 23 | and ps_suppkey = l_suppkey 24 | and ps_partkey = l_partkey 25 | and p_partkey = l_partkey 26 | and o_orderkey = l_orderkey 27 | and s_nationkey = n_nationkey 28 | and p_name like '%green%' 29 | ) as profit 30 | group by 31 | nation, 32 | o_year 33 | order by 34 | nation, 35 | o_year desc; 36 | ---- 37 | ARGENTINA 1998.0 17779.0697 38 | ARGENTINA 1997.0 13943.9538 39 | ARGENTINA 1996.0 7641.4227 40 | ARGENTINA 1995.0 20892.7525 41 | ARGENTINA 1994.0 15088.3526 42 | ARGENTINA 1993.0 17586.3446 43 | ARGENTINA 1992.0 28732.4615 44 | ETHIOPIA 1998.0 28217.1600 45 | ETHIOPIA 1996.0 33970.6500 46 | ETHIOPIA 1995.0 37720.3500 47 | ETHIOPIA 1994.0 37251.0100 48 | ETHIOPIA 1993.0 23782.6100 49 | IRAN 1997.0 23590.0080 50 | IRAN 1996.0 7428.2325 51 | IRAN 1995.0 21000.9965 52 | IRAN 1994.0 29408.1300 53 | IRAN 1993.0 49876.4150 54 | IRAN 1992.0 52064.2400 55 | IRAQ 1998.0 11619.9604 56 | IRAQ 1997.0 47910.2460 57 | IRAQ 1996.0 18459.5675 58 | IRAQ 1995.0 32782.3701 59 | IRAQ 1994.0 9041.2317 60 | IRAQ 1993.0 30687.2625 61 | IRAQ 1992.0 29098.2557 62 | KENYA 1998.0 33148.3345 63 | KENYA 1997.0 54355.0165 64 | KENYA 1996.0 53607.4854 65 | KENYA 1995.0 85354.8738 66 | KENYA 1994.0 102904.2511 67 | KENYA 1993.0 109310.8084 68 | KENYA 1992.0 138534.1210 69 | MOROCCO 1998.0 157058.2328 70 | MOROCCO 1997.0 88669.9610 71 | MOROCCO 1996.0 236833.6672 72 | MOROCCO 1995.0 381575.8668 73 | MOROCCO 1994.0 243523.4336 74 | MOROCCO 1993.0 232196.7803 75 | MOROCCO 1992.0 347434.1452 76 | PERU 1998.0 101109.0196 77 | PERU 1997.0 58073.0866 78 | PERU 1996.0 30360.5218 79 | PERU 1995.0 138451.7800 80 | PERU 1994.0 55023.0632 81 | PERU 1993.0 110409.0863 82 | PERU 1992.0 70946.1916 83 | UNITED KINGDOM 1998.0 139685.0440 84 | UNITED KINGDOM 1997.0 183502.0498 85 | UNITED KINGDOM 1996.0 374085.2884 86 | UNITED KINGDOM 1995.0 548356.7984 87 | UNITED KINGDOM 1994.0 266982.7680 88 | UNITED KINGDOM 1993.0 717309.4640 89 | UNITED KINGDOM 1992.0 79540.6016 90 | UNITED STATES 1998.0 32847.9600 91 | UNITED STATES 1997.0 30849.5000 92 | UNITED STATES 1996.0 56125.4600 93 | UNITED STATES 1995.0 15961.7977 94 | UNITED STATES 1994.0 31671.2000 95 | UNITED STATES 1993.0 55057.4690 96 | UNITED STATES 1992.0 51970.2300 97 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-agg-nulls.slt: -------------------------------------------------------------------------------- 1 | include _basic_tables.slt.part 2 | 3 | # This query has NULL values from the subquery agg. It won't work without the 4 | # outer join fix. 5 | # It also has an out-of-order extern column [#1] 6 | query 7 | select 8 | v1, 9 | v2, 10 | ( 11 | select avg(v4) 12 | from t2 13 | where v4 = v2 14 | ) as avg_v4 15 | from t1 order by v1; 16 | ---- 17 | 1 100 NULL 18 | 2 200 200.0 19 | 2 250 250.0 20 | 3 300 300.0 21 | 3 300 300.0 22 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-count-star.slt: -------------------------------------------------------------------------------- 1 | include _basic_tables.slt.part 2 | 3 | # This query uses a count(*) agg function, with nulls. Nulls should be 4 | # transformed from NULL to 0 when they come from count(*). 5 | # It won't work without the outer join fix + a special case on count(*). 6 | # It also has an out-of-order extern column [#1] 7 | query 8 | select 9 | v1, 10 | v2, 11 | ( 12 | select count(*) 13 | from t2 14 | where v4 = v2 15 | ) as avg_v4 16 | from t1 order by v1; 17 | ---- 18 | 1 100 0 19 | 2 200 1 20 | 2 250 1 21 | 3 300 1 22 | 3 300 1 23 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-dup.slt: -------------------------------------------------------------------------------- 1 | include _basic_tables.slt.part 2 | 3 | query 4 | select * from t1 where (select sum(v4) from t2 where v3 = v1) > 100; 5 | ---- 6 | 2 200 7 | 2 250 8 | 3 300 9 | 3 300 10 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-exists-2.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT 5 | c.c_custkey, 6 | c.c_name 7 | FROM 8 | customer c 9 | WHERE 10 | EXISTS ( 11 | SELECT 1 12 | FROM orders o 13 | WHERE o.o_custkey = c.c_custkey 14 | AND o.o_orderstatus = 'O' 15 | AND o.o_orderdate > '1998-08-01' 16 | ) 17 | AND NOT EXISTS ( 18 | SELECT 1 19 | FROM orders o 20 | JOIN lineitem l ON o.o_orderkey = l.l_orderkey 21 | WHERE o.o_custkey = c.c_custkey 22 | AND o.o_orderstatus = 'R' 23 | AND o.o_orderdate > '1998-08-01' 24 | AND o.o_totalprice > 5000 25 | ); 26 | ---- 27 | 88 Customer#000000088 28 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-exists-uncor.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT c_name 5 | FROM customer c 6 | WHERE c_nationkey IN ( 7 | SELECT n_nationkey 8 | FROM nation 9 | WHERE n_name = 'GERMANY' 10 | ); 11 | ---- 12 | Customer#000000062 13 | Customer#000000071 14 | Customer#000000093 15 | Customer#000000119 16 | Customer#000000129 17 | Customer#000000136 18 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-exists.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT 5 | c_custkey, 6 | c_name 7 | FROM 8 | customer c 9 | WHERE 10 | EXISTS ( 11 | SELECT 1 12 | FROM orders o 13 | WHERE o.o_custkey = c.c_custkey 14 | AND o.o_orderstatus = 'O' 15 | AND o.o_orderdate > '1998-08-01' 16 | ); 17 | ---- 18 | 88 Customer#000000088 19 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-extern-out-of-order.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | # A query with a correlated subquery that retrieves columns out of order 4 | # i.e. the extern columns are not of the format [#0, #1, ...] 5 | # This query has extern columns [#1] 6 | query 7 | select 8 | l_orderkey, 9 | l_partkey, 10 | l_extendedprice, 11 | ( 12 | select avg(p_size) 13 | from part 14 | where p_partkey = l_partkey 15 | ) as avg_extendedprice 16 | from lineitem 17 | where l_extendedprice > 55000; 18 | ---- 19 | 1121 200 55010.00 22.0 20 | 4931 200 55010.00 22.0 21 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-in-exists.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT c.c_custkey, c.c_name 5 | FROM customer c 6 | WHERE c.c_custkey IN ( 7 | SELECT o.o_custkey 8 | FROM orders o 9 | WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 10 | ) 11 | AND EXISTS ( 12 | SELECT 1 13 | FROM orders o 14 | WHERE o.o_custkey = c.c_custkey 15 | AND o.o_orderstatus = 'O' 16 | ) 17 | order by c.c_custkey; 18 | ---- 19 | 10 Customer#000000010 20 | 70 Customer#000000070 21 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-in-uncor.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT c.c_custkey, c.c_name 5 | FROM customer c 6 | WHERE c.c_custkey IN ( 7 | SELECT o.o_custkey 8 | FROM orders o 9 | WHERE o.o_totalprice > 250000 10 | ) order by c.c_custkey; 11 | ---- 12 | 10 Customer#000000010 13 | 70 Customer#000000070 14 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-in.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT c.c_custkey, c.c_name 5 | FROM customer c 6 | WHERE c.c_custkey IN ( 7 | SELECT o.o_custkey 8 | FROM orders o 9 | WHERE o.o_custkey = c.c_custkey AND o.o_totalprice > 250000 10 | ) 11 | ORDER BY c.c_custkey; 12 | ---- 13 | 10 Customer#000000010 14 | 70 Customer#000000070 15 | -------------------------------------------------------------------------------- /optd-sqllogictest/slt/unnest-not-in-uncor.slt: -------------------------------------------------------------------------------- 1 | include _tpch_tables.slt.part 2 | 3 | query 4 | SELECT c.c_custkey, c.c_name 5 | FROM customer c 6 | WHERE c.c_custkey NOT IN ( 7 | SELECT o.o_custkey 8 | FROM orders o 9 | WHERE o.o_orderstatus = 'O' 10 | ) order by c.c_custkey; 11 | ---- 12 | 3 Customer#000000003 13 | 6 Customer#000000006 14 | 9 Customer#000000009 15 | 12 Customer#000000012 16 | 15 Customer#000000015 17 | 18 Customer#000000018 18 | 21 Customer#000000021 19 | 24 Customer#000000024 20 | 27 Customer#000000027 21 | 30 Customer#000000030 22 | 33 Customer#000000033 23 | 36 Customer#000000036 24 | 39 Customer#000000039 25 | 42 Customer#000000042 26 | 45 Customer#000000045 27 | 48 Customer#000000048 28 | 51 Customer#000000051 29 | 54 Customer#000000054 30 | 57 Customer#000000057 31 | 60 Customer#000000060 32 | 63 Customer#000000063 33 | 66 Customer#000000066 34 | 69 Customer#000000069 35 | 72 Customer#000000072 36 | 75 Customer#000000075 37 | 78 Customer#000000078 38 | 81 Customer#000000081 39 | 84 Customer#000000084 40 | 87 Customer#000000087 41 | 90 Customer#000000090 42 | 93 Customer#000000093 43 | 96 Customer#000000096 44 | 99 Customer#000000099 45 | 102 Customer#000000102 46 | 105 Customer#000000105 47 | 108 Customer#000000108 48 | 111 Customer#000000111 49 | 114 Customer#000000114 50 | 117 Customer#000000117 51 | 120 Customer#000000120 52 | 123 Customer#000000123 53 | 126 Customer#000000126 54 | 129 Customer#000000129 55 | 132 Customer#000000132 56 | 135 Customer#000000135 57 | 138 Customer#000000138 58 | 141 Customer#000000141 59 | 144 Customer#000000144 60 | 147 Customer#000000147 61 | 150 Customer#000000150 62 | -------------------------------------------------------------------------------- /optd-sqllogictest/tests/harness.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::path::Path; 7 | 8 | use optd_sqllogictest::DatafusionDBMS; 9 | use sqllogictest::{harness::Failed, Runner}; 10 | use tokio::runtime::Runtime; 11 | 12 | // TODO: sqllogictest harness should support async new function 13 | 14 | fn main() { 15 | let paths = sqllogictest::harness::glob("slt/**/*.slt").expect("failed to find test files"); 16 | let mut tests = vec![]; 17 | 18 | for entry in paths { 19 | let path = entry.expect("failed to read glob entry"); 20 | tests.push(sqllogictest::harness::Trial::test( 21 | path.to_str().unwrap().to_string(), 22 | move || test(&path), 23 | )); 24 | } 25 | 26 | if tests.is_empty() { 27 | panic!("no test found for sqllogictest under: slt/**/*.slt"); 28 | } 29 | 30 | sqllogictest::harness::run(&sqllogictest::harness::Arguments::from_args(), tests).exit(); 31 | } 32 | 33 | fn build_runtime() -> Runtime { 34 | tokio::runtime::Builder::new_current_thread() 35 | .enable_all() 36 | .build() 37 | .unwrap() 38 | } 39 | 40 | fn test(filename: impl AsRef) -> Result<(), Failed> { 41 | build_runtime().block_on(async { 42 | // let mut tester = Runner::new(|| async { Ok(DatafusionDBMS::new_no_optd().await?) }); 43 | let mut tester = Runner::new(|| async { DatafusionDBMS::new().await }); 44 | tester.run_file_async(filename).await?; 45 | Ok(()) 46 | }) 47 | } 48 | -------------------------------------------------------------------------------- /optd-sqlplannertest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-sqlplannertest" 3 | description = "sqlplannertest for optd" 4 | version = { workspace = true } 5 | edition = { workspace = true } 6 | homepage = { workspace = true } 7 | keywords = { workspace = true } 8 | license = { workspace = true } 9 | repository = { workspace = true } 10 | 11 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 12 | 13 | [dependencies] 14 | clap = { version = "4.5.4", features = ["derive"] } 15 | anyhow = { version = "1", features = ["backtrace"] } 16 | sqlplannertest = "0.4.1" 17 | async-trait = "0.1" 18 | datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "43.0.0" } 19 | optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" } 20 | datafusion = { version = "43.0.0", features = [ 21 | "avro", 22 | "crypto_expressions", 23 | "encoding_expressions", 24 | "regex_expressions", 25 | "unicode_expressions", 26 | "compression", 27 | ] } 28 | mimalloc = { version = "0.1", default-features = false } 29 | regex = "1.8" 30 | tokio = { version = "1.24", features = [ 31 | "macros", 32 | "rt", 33 | "rt-multi-thread", 34 | "sync", 35 | "parking_lot", 36 | ] } 37 | optd-datafusion-bridge = { path = "../optd-datafusion-bridge", version = "0.1" } 38 | optd-datafusion-repr = { path = "../optd-datafusion-repr", version = "0.1" } 39 | itertools = "0.13" 40 | lazy_static = "1.4.0" 41 | tracing-subscriber = { version = "0.3", features = ["env-filter"] } 42 | backtrace-on-stack-overflow = "0.3" 43 | 44 | [dev-dependencies] 45 | criterion = { version = "0.5.1", features = ["async_tokio"] } 46 | serde_yaml = "0.9" 47 | 48 | [[test]] 49 | name = "planner_test" 50 | harness = false 51 | 52 | [[bench]] 53 | name = "planner_bench" 54 | harness = false 55 | -------------------------------------------------------------------------------- /optd-sqlplannertest/src/bench_helper.rs: -------------------------------------------------------------------------------- 1 | pub mod execution; 2 | pub mod planning; 3 | 4 | use std::future::Future; 5 | 6 | use crate::TestFlags; 7 | use anyhow::Result; 8 | use tokio::runtime::Runtime; 9 | 10 | pub use execution::ExecutionBenchRunner; 11 | pub use planning::PlanningBenchRunner; 12 | 13 | pub trait PlannerBenchRunner { 14 | /// Describes what the benchmark is evaluating. 15 | const BENCH_NAME: &str; 16 | /// Benchmark's input. 17 | type BenchInput; 18 | 19 | /// Setups the necessary environment for the benchmark based on the test case. 20 | /// Returns the input needed for the benchmark. 21 | fn setup( 22 | &mut self, 23 | test_case: &sqlplannertest::ParsedTestCase, 24 | ) -> impl std::future::Future> + Send; 25 | 26 | /// Runs the actual benchmark based on the test case and input. 27 | fn bench( 28 | self, 29 | input: Self::BenchInput, 30 | test_case: &sqlplannertest::ParsedTestCase, 31 | flags: &TestFlags, 32 | ) -> impl std::future::Future> + Send; 33 | } 34 | 35 | /// Sync wrapper for [`PlannerBenchRunner::setup`] 36 | pub fn bench_setup( 37 | runtime: &Runtime, 38 | runner_fn: F, 39 | testcase: &sqlplannertest::ParsedTestCase, 40 | ) -> (R, R::BenchInput, TestFlags) 41 | where 42 | F: Fn() -> Ft + Send + Sync + 'static + Clone, 43 | Ft: Future> + Send, 44 | R: PlannerBenchRunner, 45 | { 46 | runtime.block_on(async { 47 | let mut runner = runner_fn().await.unwrap(); 48 | let (input, flags) = runner.setup(testcase).await.unwrap(); 49 | (runner, input, flags) 50 | }) 51 | } 52 | 53 | /// Sync wrapper for [`PlannerBenchRunner::bench`] 54 | pub fn bench_run( 55 | runtime: &Runtime, 56 | runner: R, 57 | input: R::BenchInput, 58 | testcase: &sqlplannertest::ParsedTestCase, 59 | flags: &TestFlags, 60 | ) where 61 | R: PlannerBenchRunner, 62 | { 63 | runtime.block_on(async { runner.bench(input, testcase, flags).await.unwrap() }); 64 | } 65 | -------------------------------------------------------------------------------- /optd-sqlplannertest/src/bench_helper/execution.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::{extract_flags, DatafusionDBMS, TestFlags}; 4 | use anyhow::Result; 5 | use datafusion::{execution::TaskContext, physical_plan::ExecutionPlan}; 6 | 7 | use super::PlannerBenchRunner; 8 | 9 | /// A benchmark runner for evaluating execution time of optimized plan. 10 | pub struct ExecutionBenchRunner { 11 | pub dbms: DatafusionDBMS, 12 | /// DDLs and DMLs to populate the tables. 13 | pub populate_sql: String, 14 | } 15 | 16 | impl ExecutionBenchRunner { 17 | pub async fn new(populate_sql: String) -> Result { 18 | Ok(ExecutionBenchRunner { 19 | dbms: DatafusionDBMS::new().await?, 20 | populate_sql, 21 | }) 22 | } 23 | } 24 | 25 | /// With physical execution plan as input, 26 | /// measures the time it takes to execute the plan generated by the optimizer. 27 | impl PlannerBenchRunner for ExecutionBenchRunner { 28 | const BENCH_NAME: &str = "execution"; 29 | type BenchInput = Vec<(Arc, Arc)>; 30 | async fn setup( 31 | &mut self, 32 | test_case: &sqlplannertest::ParsedTestCase, 33 | ) -> Result<(Self::BenchInput, TestFlags)> { 34 | for sql in &test_case.before_sql { 35 | self.dbms.execute(sql, &TestFlags::default()).await?; 36 | } 37 | 38 | // Populate the existing tables. 39 | for sql in self.populate_sql.split(";\n") { 40 | self.dbms.execute(sql, &TestFlags::default()).await?; 41 | } 42 | 43 | let bench_task = test_case 44 | .tasks 45 | .iter() 46 | .find(|x| x.starts_with("bench")) 47 | .unwrap(); 48 | let flags = extract_flags(bench_task)?; 49 | 50 | self.dbms.setup(&flags).await?; 51 | let statements = self.dbms.parse_sql(&test_case.sql).await?; 52 | 53 | let mut physical_plans = Vec::new(); 54 | for statement in statements { 55 | physical_plans.push(self.dbms.create_physical_plan(statement, &flags).await?); 56 | } 57 | 58 | Ok((physical_plans, flags)) 59 | } 60 | async fn bench( 61 | self, 62 | input: Self::BenchInput, 63 | _test_case: &sqlplannertest::ParsedTestCase, 64 | _flags: &TestFlags, 65 | ) -> Result<()> { 66 | for (physical_plan, task_ctx) in input { 67 | self.dbms.execute_physical(physical_plan, task_ctx).await?; 68 | } 69 | Ok(()) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /optd-sqlplannertest/src/bench_helper/planning.rs: -------------------------------------------------------------------------------- 1 | use std::collections::VecDeque; 2 | 3 | use crate::{extract_flags, DatafusionDBMS, TestFlags}; 4 | use anyhow::Result; 5 | use datafusion::sql::parser::Statement; 6 | 7 | use super::PlannerBenchRunner; 8 | 9 | /// A benchmark runner for evaluating optimizer planning time. 10 | pub struct PlanningBenchRunner(DatafusionDBMS); 11 | 12 | impl PlanningBenchRunner { 13 | pub async fn new() -> Result { 14 | Ok(PlanningBenchRunner(DatafusionDBMS::new().await?)) 15 | } 16 | } 17 | 18 | /// With parsed statements as input, 19 | /// measures the time it takes to generate datafusion physical plans. 20 | impl PlannerBenchRunner for PlanningBenchRunner { 21 | const BENCH_NAME: &str = "planning"; 22 | type BenchInput = VecDeque; 23 | async fn setup( 24 | &mut self, 25 | test_case: &sqlplannertest::ParsedTestCase, 26 | ) -> Result<(Self::BenchInput, TestFlags)> { 27 | for sql in &test_case.before_sql { 28 | self.0.execute(sql, &TestFlags::default()).await?; 29 | } 30 | let bench_task = test_case 31 | .tasks 32 | .iter() 33 | .find(|x| x.starts_with("bench")) 34 | .unwrap(); 35 | let flags = extract_flags(bench_task)?; 36 | self.0.setup(&flags).await?; 37 | let statements = self.0.parse_sql(&test_case.sql).await?; 38 | 39 | Ok((statements, flags)) 40 | } 41 | async fn bench( 42 | self, 43 | input: Self::BenchInput, 44 | _test_case: &sqlplannertest::ParsedTestCase, 45 | flags: &TestFlags, 46 | ) -> Result<()> { 47 | for stmt in input { 48 | self.0.create_physical_plan(stmt, flags).await?; 49 | } 50 | Ok(()) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /optd-sqlplannertest/src/bin/planner_test_apply.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::path::Path; 7 | 8 | use anyhow::Result; 9 | use clap::Parser; 10 | use sqlplannertest::PlannerTestApplyOptions; 11 | 12 | #[derive(Parser)] 13 | #[command(version, about, long_about = None)] 14 | struct Cli { 15 | /// Optional list of test modules or test files to apply the test; if empty, apply all tests 16 | selections: Vec, 17 | /// Use the advanced cost model 18 | #[clap(long)] 19 | enable_advanced_cost_model: bool, 20 | /// Execute tests in serial 21 | #[clap(long)] 22 | serial: bool, 23 | } 24 | 25 | #[tokio::main] 26 | async fn main() -> Result<()> { 27 | use tracing_subscriber::{filter::LevelFilter, fmt, prelude::*, EnvFilter}; 28 | 29 | tracing_subscriber::registry() 30 | .with(fmt::layer()) 31 | .with( 32 | EnvFilter::builder() 33 | .with_default_directive(LevelFilter::INFO.into()) 34 | .from_env_lossy(), 35 | ) 36 | .init(); 37 | 38 | unsafe { backtrace_on_stack_overflow::enable() }; 39 | 40 | let cli = Cli::parse(); 41 | 42 | let enable_advanced_cost_model = cli.enable_advanced_cost_model; 43 | let opts = PlannerTestApplyOptions { 44 | serial: cli.serial, 45 | selections: cli.selections, 46 | }; 47 | 48 | sqlplannertest::planner_test_apply_with_options( 49 | Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"), 50 | move || async move { 51 | if enable_advanced_cost_model { 52 | optd_sqlplannertest::DatafusionDBMS::new_advanced_cost().await 53 | } else { 54 | optd_sqlplannertest::DatafusionDBMS::new().await 55 | } 56 | }, 57 | opts, 58 | ) 59 | .await?; 60 | 61 | Ok(()) 62 | } 63 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/basic_nodes.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- Test limit nodes 13 | select * from t1 limit 1; 14 | select * from t1 limit 3; 15 | select * from t1 limit 5; 16 | 17 | /* 18 | LogicalLimit { skip: 0(i64), fetch: 1(i64) } 19 | └── LogicalProjection { exprs: [ #0, #1 ] } 20 | └── LogicalScan { table: t1 } 21 | PhysicalLimit { skip: 0(i64), fetch: 1(i64) } 22 | └── PhysicalScan { table: t1 } 23 | 0 0 24 | 0 0 25 | 1 1 26 | 2 2 27 | 0 0 28 | 1 1 29 | 2 2 30 | */ 31 | 32 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/basic_nodes.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select * from t1 limit 1; 10 | select * from t1 limit 3; 11 | select * from t1 limit 5; 12 | desc: Test limit nodes 13 | tasks: 14 | - explain:logical_optd,physical_optd 15 | - execute -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/constant_predicate.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | insert into t1 values (0, 0), (1, 1), (2, 2); 4 | 5 | /* 6 | 3 7 | */ 8 | 9 | -- Test whether the optimizer handles integer equality predicates correctly. 10 | select * from t1 where t1v1 = 0; 11 | 12 | /* 13 | 0 0 14 | */ 15 | 16 | -- Test whether the optimizer handles multiple integer equality predicates correctly. 17 | select * from t1 where t1v1 = 0 and t1v2 = 1; 18 | 19 | /* 20 | 21 | */ 22 | 23 | -- Test whether the optimizer handles multiple integer inequality predicates correctly. 24 | select * from t1 where t1v1 = 0 and t1v2 != 1; 25 | 26 | /* 27 | 0 0 28 | */ 29 | 30 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/constant_predicate.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | insert into t1 values (0, 0), (1, 1), (2, 2); 4 | tasks: 5 | - execute 6 | - sql: | 7 | select * from t1 where t1v1 = 0; 8 | desc: Test whether the optimizer handles integer equality predicates correctly. 9 | tasks: 10 | - execute 11 | - sql: | 12 | select * from t1 where t1v1 = 0 and t1v2 = 1; 13 | desc: Test whether the optimizer handles multiple integer equality predicates correctly. 14 | tasks: 15 | - execute 16 | - sql: | 17 | select * from t1 where t1v1 = 0 and t1v2 != 1; 18 | desc: Test whether the optimizer handles multiple integer inequality predicates correctly. 19 | tasks: 20 | - execute 21 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/cross_product.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int); 3 | create table t2(t2v1 int); 4 | insert into t1 values (0), (1), (2); 5 | insert into t2 values (0), (1), (2); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- Test optimizer logical for a cross product. 13 | select * from t1, t2; 14 | 15 | /* 16 | LogicalProjection { exprs: [ #0, #1 ] } 17 | └── LogicalJoin { join_type: Inner, cond: true } 18 | ├── LogicalScan { table: t1 } 19 | └── LogicalScan { table: t2 } 20 | PhysicalNestedLoopJoin { join_type: Inner, cond: true } 21 | ├── PhysicalScan { table: t1 } 22 | └── PhysicalScan { table: t2 } 23 | 0 0 24 | 0 1 25 | 0 2 26 | 1 0 27 | 1 1 28 | 1 2 29 | 2 0 30 | 2 1 31 | 2 2 32 | */ 33 | 34 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/cross_product.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int); 3 | create table t2(t2v1 int); 4 | insert into t1 values (0), (1), (2); 5 | insert into t2 values (0), (1), (2); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select * from t1, t2; 10 | desc: Test optimizer logical for a cross product. 11 | tasks: 12 | - explain:logical_optd,physical_optd 13 | - execute 14 | 15 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/eliminate_duplicated_expr.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(v1 int, v2 int); 3 | insert into t1 values (0, 0), (1, 1), (5, 2), (2, 4), (0, 2); 4 | 5 | /* 6 | 5 7 | */ 8 | 9 | -- Test without sorts/aggs. 10 | select * from t1; 11 | 12 | /* 13 | LogicalProjection { exprs: [ #0, #1 ] } 14 | └── LogicalScan { table: t1 } 15 | PhysicalScan { table: t1 } 16 | 0 0 17 | 1 1 18 | 5 2 19 | 2 4 20 | 0 2 21 | */ 22 | 23 | -- Test whether the optimizer handles duplicate sort expressions correctly. 24 | select * from t1 order by v1, v2, v1 desc, v2 desc, v1 asc; 25 | 26 | /* 27 | LogicalSort 28 | ├── exprs: 29 | │ ┌── SortOrder { order: Asc } 30 | │ │ └── #0 31 | │ ├── SortOrder { order: Asc } 32 | │ │ └── #1 33 | │ ├── SortOrder { order: Desc } 34 | │ │ └── #0 35 | │ ├── SortOrder { order: Desc } 36 | │ │ └── #1 37 | │ └── SortOrder { order: Asc } 38 | │ └── #0 39 | └── LogicalProjection { exprs: [ #0, #1 ] } 40 | └── LogicalScan { table: t1 } 41 | PhysicalSort 42 | ├── exprs: 43 | │ ┌── SortOrder { order: Asc } 44 | │ │ └── #0 45 | │ └── SortOrder { order: Asc } 46 | │ └── #1 47 | └── PhysicalScan { table: t1 } 48 | 0 0 49 | 0 2 50 | 1 1 51 | 2 4 52 | 5 2 53 | */ 54 | 55 | -- Test whether the optimizer handles duplicate agg expressions correctly. 56 | select * from t1 group by v1, v2, v1; 57 | 58 | /* 59 | LogicalProjection { exprs: [ #0, #1 ] } 60 | └── LogicalAgg { exprs: [], groups: [ #0, #1, #0 ] } 61 | └── LogicalScan { table: t1 } 62 | PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } 63 | └── PhysicalScan { table: t1 } 64 | 0 0 65 | 1 1 66 | 5 2 67 | 2 4 68 | 0 2 69 | */ 70 | 71 | -- Test whether the optimizer handles duplicate sort and agg expressions correctly. 72 | select * from t1 group by v1, v2, v1, v2, v2 order by v1, v2, v1 desc, v2 desc, v1 asc; 73 | 74 | /* 75 | LogicalSort 76 | ├── exprs: 77 | │ ┌── SortOrder { order: Asc } 78 | │ │ └── #0 79 | │ ├── SortOrder { order: Asc } 80 | │ │ └── #1 81 | │ ├── SortOrder { order: Desc } 82 | │ │ └── #0 83 | │ ├── SortOrder { order: Desc } 84 | │ │ └── #1 85 | │ └── SortOrder { order: Asc } 86 | │ └── #0 87 | └── LogicalProjection { exprs: [ #0, #1 ] } 88 | └── LogicalAgg { exprs: [], groups: [ #0, #1, #0, #1, #1 ] } 89 | └── LogicalScan { table: t1 } 90 | PhysicalSort 91 | ├── exprs: 92 | │ ┌── SortOrder { order: Asc } 93 | │ │ └── #0 94 | │ └── SortOrder { order: Asc } 95 | │ └── #1 96 | └── PhysicalAgg { aggrs: [], groups: [ #0, #1 ] } 97 | └── PhysicalScan { table: t1 } 98 | 0 0 99 | 0 2 100 | 1 1 101 | 2 4 102 | 5 2 103 | */ 104 | 105 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/eliminate_duplicated_expr.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(v1 int, v2 int); 3 | insert into t1 values (0, 0), (1, 1), (5, 2), (2, 4), (0, 2); 4 | tasks: 5 | - execute 6 | - sql: | 7 | select * from t1; 8 | desc: Test without sorts/aggs. 9 | tasks: 10 | - explain:logical_optd,physical_optd 11 | - execute 12 | - sql: | 13 | select * from t1 order by v1, v2, v1 desc, v2 desc, v1 asc; 14 | desc: Test whether the optimizer handles duplicate sort expressions correctly. 15 | tasks: 16 | - explain:logical_optd,physical_optd 17 | - execute 18 | - sql: | 19 | select * from t1 group by v1, v2, v1; 20 | desc: Test whether the optimizer handles duplicate agg expressions correctly. 21 | tasks: 22 | - explain:logical_optd,physical_optd 23 | - execute 24 | - sql: | 25 | select * from t1 group by v1, v2, v1, v2, v2 order by v1, v2, v1 desc, v2 desc, v1 asc; 26 | desc: Test whether the optimizer handles duplicate sort and agg expressions correctly. 27 | tasks: 28 | - explain:logical_optd,physical_optd 29 | - execute -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/eliminate_limit.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- Test EliminateLimitRule (with 0 limit clause) 13 | select * from t1 LIMIT 0; 14 | 15 | /* 16 | LogicalLimit { skip: 0(i64), fetch: 0(i64) } 17 | └── LogicalProjection { exprs: [ #0, #1 ] } 18 | └── LogicalScan { table: t1 } 19 | PhysicalEmptyRelation { produce_one_row: false } 20 | */ 21 | 22 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/eliminate_limit.yml: -------------------------------------------------------------------------------- 1 | 2 | - sql: | 3 | create table t1(t1v1 int, t1v2 int); 4 | create table t2(t2v1 int, t2v3 int); 5 | insert into t1 values (0, 0), (1, 1), (2, 2); 6 | insert into t2 values (0, 200), (1, 201), (2, 202); 7 | tasks: 8 | - execute 9 | - sql: | 10 | select * from t1 LIMIT 0; 11 | desc: Test EliminateLimitRule (with 0 limit clause) 12 | tasks: 13 | - explain:logical_optd,physical_optd 14 | - execute -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/eliminate_proj.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(v1 int, v2 int); 3 | insert into t1 values (0, 0), (1, 1), (2, 2); 4 | create table t2(v0 int, v1 int, v2 int, v3 int); 5 | insert into t2 values (0, 0, 0, 0), (1, 1, 1, 1), (2, 2, 2, 2); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5)); 10 | desc: Test MergeProjectRule with only the rule enabled 11 | tasks: 12 | - explain[logical_rules:project_merge_rule]:logical_optd,physical_optd 13 | - sql: | 14 | select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5)); 15 | desc: Test EliminateProjectRule with only the rule enabled 16 | tasks: 17 | - explain[logical_rules:eliminate_project_rule]:logical_optd,physical_optd 18 | - sql: | 19 | select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5)); 20 | desc: Test with all rules enabled 21 | tasks: 22 | - explain:logical_optd,physical_optd 23 | - execute 24 | - sql: | 25 | select v1 from (select v2, v1 from (select v1, v2 from t1 limit 5)); 26 | desc: Test with all rules enabled 27 | tasks: 28 | - explain:logical_optd,physical_optd 29 | - execute 30 | - sql: | 31 | select v0, v2, v1, v3 from (select v0 as v0, v2 as v1, v1 as v2, v3 from t2); 32 | desc: Test with all rules enabled 33 | tasks: 34 | - explain:logical_optd,physical_optd 35 | - execute 36 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/empty_relation.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- Test whether the optimizer handles empty relation (select single value) correctly. 13 | select 64 + 1; 14 | 15 | /* 16 | LogicalProjection 17 | ├── exprs:Add 18 | │ ├── 64(i64) 19 | │ └── 1(i64) 20 | └── LogicalEmptyRelation { produce_one_row: true } 21 | PhysicalProjection 22 | ├── exprs:Add 23 | │ ├── 64(i64) 24 | │ └── 1(i64) 25 | └── PhysicalEmptyRelation { produce_one_row: true } 26 | 65 27 | */ 28 | 29 | -- Test whether the optimizer handles select constant from table correctly. 30 | select 64 + 1 from t1; 31 | 32 | /* 33 | LogicalProjection 34 | ├── exprs:Add 35 | │ ├── 64(i64) 36 | │ └── 1(i64) 37 | └── LogicalScan { table: t1 } 38 | PhysicalProjection 39 | ├── exprs:Add 40 | │ ├── 64(i64) 41 | │ └── 1(i64) 42 | └── PhysicalScan { table: t1 } 43 | 65 44 | 65 45 | 65 46 | */ 47 | 48 | -- Test whether the optimizer eliminates join to empty relation 49 | select * from t1 inner join t2 on false; 50 | 51 | /* 52 | LogicalProjection { exprs: [ #0, #1, #2, #3 ] } 53 | └── LogicalJoin { join_type: Inner, cond: false } 54 | ├── LogicalScan { table: t1 } 55 | └── LogicalScan { table: t2 } 56 | PhysicalEmptyRelation { produce_one_row: false } 57 | */ 58 | 59 | -- Test whether the optimizer eliminates join to empty relation 60 | select 64+1 from t1 inner join t2 on false; 61 | 62 | /* 63 | LogicalProjection 64 | ├── exprs:Add 65 | │ ├── 64(i64) 66 | │ └── 1(i64) 67 | └── LogicalJoin { join_type: Inner, cond: false } 68 | ├── LogicalScan { table: t1 } 69 | └── LogicalScan { table: t2 } 70 | PhysicalProjection 71 | ├── exprs:Add 72 | │ ├── 64(i64) 73 | │ └── 1(i64) 74 | └── PhysicalEmptyRelation { produce_one_row: false } 75 | */ 76 | 77 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/empty_relation.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select 64 + 1; 10 | desc: Test whether the optimizer handles empty relation (select single value) correctly. 11 | tasks: 12 | - explain:logical_optd,physical_optd 13 | - execute 14 | - sql: | 15 | select 64 + 1 from t1; 16 | desc: Test whether the optimizer handles select constant from table correctly. 17 | tasks: 18 | - explain:logical_optd,physical_optd 19 | - execute 20 | - sql: | 21 | select * from t1 inner join t2 on false; 22 | desc: Test whether the optimizer eliminates join to empty relation 23 | tasks: 24 | - explain:logical_optd,physical_optd 25 | - execute 26 | - sql: | 27 | select 64+1 from t1 inner join t2 on false; 28 | desc: Test whether the optimizer eliminates join to empty relation 29 | tasks: 30 | - explain:logical_optd,physical_optd 31 | - execute 32 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/verbose.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(v1 int); 3 | insert into t1 values (0), (1), (2), (3); 4 | 5 | /* 6 | 4 7 | */ 8 | 9 | -- Test non-verbose explain 10 | select * from t1; 11 | 12 | /* 13 | PhysicalScan { table: t1 } 14 | */ 15 | 16 | -- Test verbose explain 17 | select * from t1; 18 | 19 | /* 20 | PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } 21 | */ 22 | 23 | -- Test verbose explain with aggregation 24 | select count(*) from t1; 25 | 26 | /* 27 | PhysicalAgg 28 | ├── aggrs:Agg(Count) 29 | │ └── [ 1(i64) ] 30 | ├── groups: [] 31 | ├── cost: {compute=5000,io=1000} 32 | ├── stat: {row_cnt=1000} 33 | └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} } 34 | */ 35 | 36 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/basic/verbose.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(v1 int); 3 | insert into t1 values (0), (1), (2), (3); 4 | tasks: 5 | - execute 6 | - sql: | 7 | select * from t1; 8 | desc: Test non-verbose explain 9 | tasks: 10 | - explain:physical_optd 11 | - sql: | 12 | select * from t1; 13 | desc: Test verbose explain 14 | tasks: 15 | - explain[verbose]:physical_optd 16 | - sql: | 17 | select count(*) from t1; 18 | desc: Test verbose explain with aggregation 19 | tasks: 20 | - explain[verbose]:physical_optd 21 | 22 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/expressions/redundant_exprs.planner.sql: -------------------------------------------------------------------------------- 1 | -- Setup Test Table 2 | CREATE TABLE xxx (a INTEGER, b INTEGER); 3 | INSERT INTO xxx VALUES (0, 0), (1, 1), (2, 2); 4 | SELECT * FROM xxx WHERE a = 0; 5 | 6 | /* 7 | 3 8 | 0 0 9 | */ 10 | 11 | -- (no id or description) 12 | SELECT * FROM xxx WHERE a + 0 = b + 0; 13 | 14 | /* 15 | 0 0 16 | 1 1 17 | 2 2 18 | 19 | LogicalProjection { exprs: [ #0, #1 ] } 20 | └── LogicalFilter 21 | ├── cond:Eq 22 | │ ├── Add 23 | │ │ ├── Cast { cast_to: Int64, child: #0 } 24 | │ │ └── 0(i64) 25 | │ └── Add 26 | │ ├── Cast { cast_to: Int64, child: #1 } 27 | │ └── 0(i64) 28 | └── LogicalScan { table: xxx } 29 | PhysicalFilter 30 | ├── cond:Eq 31 | │ ├── Add 32 | │ │ ├── Cast { cast_to: Int64, child: #0 } 33 | │ │ └── 0(i64) 34 | │ └── Add 35 | │ ├── Cast { cast_to: Int64, child: #1 } 36 | │ └── 0(i64) 37 | └── PhysicalScan { table: xxx } 38 | */ 39 | 40 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/expressions/redundant_exprs.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | CREATE TABLE xxx (a INTEGER, b INTEGER); 3 | INSERT INTO xxx VALUES (0, 0), (1, 1), (2, 2); 4 | SELECT * FROM xxx WHERE a = 0; 5 | tasks: 6 | - execute 7 | desc: Setup Test Table 8 | - sql: | 9 | SELECT * FROM xxx WHERE a + 0 = b + 0; 10 | tasks: 11 | - execute 12 | - explain:logical_optd,physical_optd -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/joins/join_enumerate.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | create table t3(t3v2 int, t3v4 int); 5 | insert into t1 values (0, 0), (1, 1), (2, 2); 6 | insert into t2 values (0, 200), (1, 201), (2, 202); 7 | insert into t3 values (0, 300), (1, 301), (2, 302); 8 | tasks: 9 | - execute 10 | - sql: | 11 | select * from t2, t1 where t1v1 = t2v1; 12 | desc: Test whether the optimizer enumerates all 2-join orders. 13 | tasks: 14 | - explain[disable_pruning]:logical_join_orders 15 | - explain:logical_join_orders 16 | - execute 17 | - sql: | 18 | select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2; 19 | desc: Test whether the optimizer enumerates all 3-join orders. (It should) 20 | tasks: 21 | - explain[disable_pruning]:logical_join_orders 22 | - explain:logical_join_orders 23 | - execute 24 | - sql: | 25 | select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2; 26 | desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently) 27 | tasks: 28 | - explain[disable_pruning]:logical_join_orders 29 | - explain:logical_join_orders 30 | - execute 31 | - sql: | 32 | select * from t1, (select * from t2, t3) where t1v1 = t2v1 and t1v2 = t3v2; 33 | desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently) 34 | tasks: 35 | - explain[disable_pruning]:logical_join_orders,physical_optd 36 | - explain:logical_join_orders,physical_optd 37 | - execute 38 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/joins/multi-join.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(a int, b int); 3 | create table t2(c int, d int); 4 | create table t3(e int, f int); 5 | create table t4(g int, h int); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select * from t1, t2, t3 where a = c AND d = e; 10 | desc: test 3-way join 11 | tasks: 12 | - explain:logical_optd,physical_optd 13 | - sql: | 14 | select * from t1, t2, t3 where a = c AND b = e; 15 | desc: test 3-way join 16 | tasks: 17 | - explain:logical_optd,physical_optd 18 | - sql: | 19 | select * from t1, t2, t3, t4 where a = c AND b = e AND f = g; 20 | desc: test 4-way join 21 | tasks: 22 | - explain:logical_optd,physical_optd 23 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/joins/self-join.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- test self join 13 | select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1; 14 | 15 | /* 16 | (Join t1 t1) 17 | 18 | LogicalSort 19 | ├── exprs:SortOrder { order: Asc } 20 | │ └── #0 21 | └── LogicalProjection { exprs: [ #0, #1, #2, #3 ] } 22 | └── LogicalFilter 23 | ├── cond:Eq 24 | │ ├── #0 25 | │ └── #2 26 | └── LogicalJoin { join_type: Inner, cond: true } 27 | ├── LogicalScan { table: t1 } 28 | └── LogicalScan { table: t1 } 29 | PhysicalSort 30 | ├── exprs:SortOrder { order: Asc } 31 | │ └── #0 32 | └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] } 33 | ├── PhysicalScan { table: t1 } 34 | └── PhysicalScan { table: t1 } 35 | 0 0 0 0 36 | 1 1 1 1 37 | 2 2 2 2 38 | */ 39 | 40 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/joins/self-join.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1; 10 | desc: test self join 11 | tasks: 12 | - explain:logical_join_orders,logical_optd,physical_optd 13 | - execute 14 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/planner_test.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2023-2024 CMU Database Group 2 | // 3 | // Use of this source code is governed by an MIT-style license that can be found in the LICENSE file or at 4 | // https://opensource.org/licenses/MIT. 5 | 6 | use std::path::Path; 7 | 8 | use anyhow::Result; 9 | 10 | fn main() -> Result<()> { 11 | sqlplannertest::planner_test_runner( 12 | Path::new(env!("CARGO_MANIFEST_DIR")).join("tests"), 13 | || async { optd_sqlplannertest::DatafusionDBMS::new().await }, 14 | )?; 15 | Ok(()) 16 | } 17 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/pushdowns/fliter_transpose.planner.sql: -------------------------------------------------------------------------------- 1 | -- (no id or description) 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | 7 | /* 8 | 3 9 | 3 10 | */ 11 | 12 | -- Test whether we can transpose filter and projection 13 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 14 | FROM t1, t2 15 | WHERE t1.t1v1 = t2.t2v1; 16 | 17 | /* 18 | LogicalProjection { exprs: [ #0, #1, #3 ] } 19 | └── LogicalFilter 20 | ├── cond:Eq 21 | │ ├── #0 22 | │ └── #2 23 | └── LogicalJoin { join_type: Inner, cond: true } 24 | ├── LogicalScan { table: t1 } 25 | └── LogicalScan { table: t2 } 26 | PhysicalProjection { exprs: [ #0, #1, #3 ] } 27 | └── PhysicalFilter 28 | ├── cond:Eq 29 | │ ├── #0 30 | │ └── #2 31 | └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } 32 | ├── PhysicalScan { table: t1 } 33 | └── PhysicalScan { table: t2 } 34 | */ 35 | 36 | -- Test whether we can transpose filter and projection 37 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 38 | FROM t1, t2 39 | WHERE t1.t1v1 = t2.t2v3; 40 | 41 | /* 42 | LogicalProjection { exprs: [ #0, #1, #3 ] } 43 | └── LogicalFilter 44 | ├── cond:Eq 45 | │ ├── #0 46 | │ └── #3 47 | └── LogicalJoin { join_type: Inner, cond: true } 48 | ├── LogicalScan { table: t1 } 49 | └── LogicalScan { table: t2 } 50 | PhysicalProjection { exprs: [ #0, #1, #3 ] } 51 | └── PhysicalFilter 52 | ├── cond:Eq 53 | │ ├── #0 54 | │ └── #3 55 | └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } 56 | ├── PhysicalScan { table: t1 } 57 | └── PhysicalScan { table: t2 } 58 | */ 59 | 60 | -- Test whether we can transpose filter and projection 61 | SELECT * FROM ( 62 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 FROM t1, t2 63 | ) WHERE t1.t1v1 = t2.t2v3; 64 | 65 | /* 66 | LogicalProjection { exprs: [ #0, #1, #2 ] } 67 | └── LogicalFilter 68 | ├── cond:Eq 69 | │ ├── #0 70 | │ └── #2 71 | └── LogicalProjection { exprs: [ #0, #1, #3 ] } 72 | └── LogicalJoin { join_type: Inner, cond: true } 73 | ├── LogicalScan { table: t1 } 74 | └── LogicalScan { table: t2 } 75 | PhysicalProjection { exprs: [ #0, #1, #3 ] } 76 | └── PhysicalFilter 77 | ├── cond:Eq 78 | │ ├── #0 79 | │ └── #3 80 | └── PhysicalNestedLoopJoin { join_type: Inner, cond: true } 81 | ├── PhysicalScan { table: t1 } 82 | └── PhysicalScan { table: t2 } 83 | */ 84 | 85 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/pushdowns/fliter_transpose.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | tasks: 7 | - execute 8 | - sql: | 9 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 10 | FROM t1, t2 11 | WHERE t1.t1v1 = t2.t2v1; 12 | desc: Test whether we can transpose filter and projection 13 | tasks: 14 | - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd 15 | - sql: | 16 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 17 | FROM t1, t2 18 | WHERE t1.t1v1 = t2.t2v3; 19 | desc: Test whether we can transpose filter and projection 20 | tasks: 21 | - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd 22 | - sql: | 23 | SELECT * FROM ( 24 | SELECT t1.t1v1, t1.t1v2, t2.t2v3 FROM t1, t2 25 | ) WHERE t1.t1v1 = t2.t2v3; 26 | desc: Test whether we can transpose filter and projection 27 | tasks: 28 | - explain[logical_rules:filter_project_transpose_rule+project_filter_transpose_rule+project_merge_rule,panic_on_budget]:logical_optd,physical_optd 29 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/subqueries/subquery_unnesting.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | create table t3(t3v2 int, t3v4 int); 5 | tasks: 6 | - execute 7 | # - sql: | 8 | # select * from t1 where t1v1 in (select t2v1 from t2); 9 | # desc: Test whether the optimizer can unnest "in" subqueries. -- failing with unsupported expression 10 | # tasks: 11 | # - explain_logical 12 | - sql: | 13 | select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100; 14 | desc: Test whether the optimizer can unnest correlated subqueries with (scalar op agg) 15 | tasks: 16 | - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd 17 | - sql: | 18 | select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100; 19 | desc: Test whether the optimizer can unnest correlated subqueries with (scalar op group agg) 20 | tasks: 21 | - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd 22 | - sql: | 23 | select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1; 24 | desc: Test whether the optimizer can unnest correlated subqueries with scalar agg in select list 25 | tasks: 26 | - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd 27 | # - sql: | 28 | # select * from t1 where exists (select * from t2 where t2v1 = t1v1); 29 | # desc: Test whether the optimizer can unnest correlated subqueries with exists 30 | # tasks: 31 | # - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd 32 | # todo: a test case on quantifier (any/all) 33 | - sql: | 34 | select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100; 35 | desc: Test whether the optimizer can unnest correlated subqueries. 36 | tasks: 37 | - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd 38 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/bench_populate.sql: -------------------------------------------------------------------------------- 1 | -- A special version of DDL/DML for popluating the TPC-H tables, sf=0.01 2 | 3 | CREATE EXTERNAL TABLE customer_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/customer.csv'; 4 | insert into customer select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8 from customer_tbl; 5 | CREATE EXTERNAL TABLE lineitem_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/lineitem.csv'; 6 | insert into lineitem select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9, column_10, column_11, column_12, column_13, column_14, column_15, column_16 from lineitem_tbl; 7 | CREATE EXTERNAL TABLE nation_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/nation.csv'; 8 | insert into nation select column_1, column_2, column_3, column_4 from nation_tbl; 9 | CREATE EXTERNAL TABLE orders_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/orders.csv'; 10 | insert into orders select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9 from orders_tbl; 11 | CREATE EXTERNAL TABLE part_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/part.csv'; 12 | insert into part select column_1, column_2, column_3, column_4, column_5, column_6, column_7, column_8, column_9 from part_tbl; 13 | CREATE EXTERNAL TABLE partsupp_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/partsupp.csv'; 14 | insert into partsupp select column_1, column_2, column_3, column_4, column_5 from partsupp_tbl; 15 | CREATE EXTERNAL TABLE region_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/region.csv'; 16 | insert into region select column_1, column_2, column_3 from region_tbl; 17 | CREATE EXTERNAL TABLE supplier_tbl STORED AS CSV OPTIONS (HAS_HEADER false, DELIMITER '|') LOCATION '../datafusion-optd-cli/tpch-sf0_01/supplier.csv'; 18 | insert into supplier select column_1, column_2, column_3, column_4, column_5, column_6, column_7 from supplier_tbl; 19 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q1.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | l_returnflag, 4 | l_linestatus, 5 | sum(l_quantity) as sum_qty, 6 | sum(l_extendedprice) as sum_base_price, 7 | sum(l_extendedprice * (1 - l_discount)) as sum_disc_price, 8 | sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge, 9 | avg(l_quantity) as avg_qty, 10 | avg(l_extendedprice) as avg_price, 11 | avg(l_discount) as avg_disc, 12 | count(*) as count_order 13 | FROM 14 | lineitem 15 | WHERE 16 | l_shipdate <= date '1998-12-01' - interval '90' day 17 | GROUP BY 18 | l_returnflag, l_linestatus 19 | ORDER BY 20 | l_returnflag, l_linestatus; 21 | desc: TPC-H Q1 22 | before: ["include_sql:schema.sql"] 23 | tasks: 24 | - explain:logical_optd,physical_optd 25 | - bench 26 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q10.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | c_custkey, 4 | c_name, 5 | sum(l_extendedprice * (1 - l_discount)) as revenue, 6 | c_acctbal, 7 | n_name, 8 | c_address, 9 | c_phone, 10 | c_comment 11 | FROM 12 | customer, 13 | orders, 14 | lineitem, 15 | nation 16 | WHERE 17 | c_custkey = o_custkey 18 | AND l_orderkey = o_orderkey 19 | AND o_orderdate >= DATE '1993-07-01' 20 | AND o_orderdate < DATE '1993-07-01' + INTERVAL '3' MONTH 21 | AND l_returnflag = 'R' 22 | AND c_nationkey = n_nationkey 23 | GROUP BY 24 | c_custkey, 25 | c_name, 26 | c_acctbal, 27 | c_phone, 28 | n_name, 29 | c_address, 30 | c_comment 31 | ORDER BY 32 | revenue DESC 33 | LIMIT 20; 34 | desc: TPC-H Q10 35 | before: ["include_sql:schema.sql"] 36 | tasks: 37 | - explain:logical_optd,physical_optd 38 | - bench 39 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q11.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | ps_partkey, 4 | sum(ps_supplycost * ps_availqty) as value 5 | from 6 | partsupp, 7 | supplier, 8 | nation 9 | where 10 | ps_suppkey = s_suppkey 11 | and s_nationkey = n_nationkey 12 | and n_name = 'CHINA' 13 | group by 14 | ps_partkey having 15 | sum(ps_supplycost * ps_availqty) > ( 16 | select 17 | sum(ps_supplycost * ps_availqty) * 0.0001000000 18 | from 19 | partsupp, 20 | supplier, 21 | nation 22 | where 23 | ps_suppkey = s_suppkey 24 | and s_nationkey = n_nationkey 25 | and n_name = 'CHINA' 26 | ) 27 | order by 28 | value desc; 29 | desc: TPC-H Q11 30 | before: ["include_sql:schema.sql"] 31 | tasks: 32 | - explain:logical_optd,physical_optd 33 | - bench 34 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q12.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | l_shipmode, 4 | sum(case when o_orderpriority = '1-URGENT' 5 | or o_orderpriority = '2-HIGH' 6 | then 1 else 0 end) as high_priority_orders, 7 | sum(case when o_orderpriority <> '1-URGENT' 8 | and o_orderpriority <> '2-HIGH' 9 | then 1 else 0 end) as low_priority_orders 10 | FROM 11 | orders, 12 | lineitem 13 | WHERE 14 | o_orderkey = l_orderkey 15 | AND l_shipmode in ('MAIL', 'SHIP') 16 | AND l_commitdate < l_receiptdate 17 | AND l_shipdate < l_commitdate 18 | AND l_receiptdate >= DATE '1994-01-01' 19 | AND l_receiptdate < DATE '1995-01-01' 20 | GROUP BY 21 | l_shipmode 22 | ORDER BY 23 | l_shipmode; 24 | desc: TPC-H Q12 25 | before: ["include_sql:schema.sql"] 26 | tasks: 27 | - explain:logical_optd,physical_optd 28 | - bench 29 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q13.planner.sql: -------------------------------------------------------------------------------- 1 | -- TPC-H Q13 2 | select 3 | c_count, 4 | count(*) as custdist 5 | from 6 | ( 7 | select 8 | c_custkey, 9 | count(o_orderkey) 10 | from 11 | customer left outer join orders on 12 | c_custkey = o_custkey 13 | and o_comment not like '%special%requests%' 14 | group by 15 | c_custkey 16 | ) as c_orders (c_custkey, c_count) 17 | group by 18 | c_count 19 | order by 20 | custdist desc, 21 | c_count desc; 22 | 23 | /* 24 | LogicalSort 25 | ├── exprs: 26 | │ ┌── SortOrder { order: Desc } 27 | │ │ └── #1 28 | │ └── SortOrder { order: Desc } 29 | │ └── #0 30 | └── LogicalProjection { exprs: [ #0, #1 ] } 31 | └── LogicalAgg 32 | ├── exprs:Agg(Count) 33 | │ └── [ 1(i64) ] 34 | ├── groups: [ #1 ] 35 | └── LogicalProjection { exprs: [ #0, #1 ] } 36 | └── LogicalProjection { exprs: [ #0, #1 ] } 37 | └── LogicalAgg 38 | ├── exprs:Agg(Count) 39 | │ └── [ #8 ] 40 | ├── groups: [ #0 ] 41 | └── LogicalJoin 42 | ├── join_type: LeftOuter 43 | ├── cond:And 44 | │ ├── Eq 45 | │ │ ├── #0 46 | │ │ └── #9 47 | │ └── Like { expr: #16, pattern: "%special%requests%", negated: true, case_insensitive: false } 48 | ├── LogicalScan { table: customer } 49 | └── LogicalScan { table: orders } 50 | PhysicalSort 51 | ├── exprs: 52 | │ ┌── SortOrder { order: Desc } 53 | │ │ └── #1 54 | │ └── SortOrder { order: Desc } 55 | │ └── #0 56 | └── PhysicalAgg 57 | ├── aggrs:Agg(Count) 58 | │ └── [ 1(i64) ] 59 | ├── groups: [ #1 ] 60 | └── PhysicalAgg 61 | ├── aggrs:Agg(Count) 62 | │ └── [ #8 ] 63 | ├── groups: [ #0 ] 64 | └── PhysicalNestedLoopJoin 65 | ├── join_type: LeftOuter 66 | ├── cond:And 67 | │ ├── Eq 68 | │ │ ├── #0 69 | │ │ └── #9 70 | │ └── Like { expr: #16, pattern: "%special%requests%", negated: true, case_insensitive: false } 71 | ├── PhysicalScan { table: customer } 72 | └── PhysicalScan { table: orders } 73 | */ 74 | 75 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q13.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | c_count, 4 | count(*) as custdist 5 | from 6 | ( 7 | select 8 | c_custkey, 9 | count(o_orderkey) 10 | from 11 | customer left outer join orders on 12 | c_custkey = o_custkey 13 | and o_comment not like '%special%requests%' 14 | group by 15 | c_custkey 16 | ) as c_orders (c_custkey, c_count) 17 | group by 18 | c_count 19 | order by 20 | custdist desc, 21 | c_count desc; 22 | desc: TPC-H Q13 23 | before: ["include_sql:schema.sql"] 24 | tasks: 25 | - explain:logical_optd,physical_optd 26 | - bench 27 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q14.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | 100.00 * sum(case when p_type like 'PROMO%' 4 | then l_extendedprice * (1 - l_discount) 5 | else 0 end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 6 | FROM 7 | lineitem, 8 | part 9 | WHERE 10 | l_partkey = p_partkey 11 | AND l_shipdate >= DATE '1995-09-01' 12 | AND l_shipdate < DATE '1995-09-01' + INTERVAL '1' MONTH; 13 | desc: TPC-H Q14 14 | before: ["include_sql:schema.sql"] 15 | tasks: 16 | - explain:logical_optd,physical_optd 17 | - bench 18 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q15.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | WITH revenue0 (supplier_no, total_revenue) AS 3 | ( 4 | SELECT 5 | l_suppkey, 6 | SUM(l_extendedprice * (1 - l_discount)) 7 | FROM 8 | lineitem 9 | WHERE 10 | l_shipdate >= DATE '1993-01-01' 11 | AND l_shipdate < DATE '1993-01-01' + INTERVAL '3' MONTH 12 | GROUP BY 13 | l_suppkey 14 | ) 15 | SELECT 16 | s_suppkey, 17 | s_name, 18 | s_address, 19 | s_phone, 20 | total_revenue 21 | FROM 22 | supplier, 23 | revenue0 24 | WHERE 25 | s_suppkey = supplier_no 26 | AND total_revenue = 27 | ( 28 | SELECT 29 | MAX(total_revenue) 30 | FROM 31 | revenue0 32 | ) 33 | ORDER BY 34 | s_suppkey; 35 | desc: TPC-H Q15 36 | before: ["include_sql:schema.sql"] 37 | tasks: 38 | - explain:logical_optd,physical_optd 39 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q16.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | p_brand, 4 | p_type, 5 | p_size, 6 | count(distinct ps_suppkey) as supplier_cnt 7 | from 8 | partsupp, 9 | part 10 | where 11 | p_partkey = ps_partkey 12 | and p_brand <> 'Brand#45' 13 | and p_type not like 'MEDIUM POLISHED%' 14 | and p_size in (49, 14, 23, 45, 19, 3, 36, 9) 15 | and ps_suppkey not in ( 16 | select 17 | s_suppkey 18 | from 19 | supplier 20 | where 21 | s_comment like '%Customer%Complaints%' 22 | ) 23 | group by 24 | p_brand, 25 | p_type, 26 | p_size 27 | order by 28 | supplier_cnt desc, 29 | p_brand, 30 | p_type, 31 | p_size; 32 | desc: TPC-H Q16 33 | before: ["include_sql:schema.sql"] 34 | tasks: 35 | - explain:logical_optd,physical_optd 36 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q17.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | ROUND(SUM(l_extendedprice) / 7.0, 16) AS avg_yearly 4 | FROM 5 | lineitem, 6 | part 7 | WHERE 8 | p_partkey = l_partkey 9 | AND p_brand = 'Brand#13' 10 | AND p_container = 'JUMBO PKG' 11 | AND l_quantity < ( 12 | SELECT 13 | 0.2 * AVG(l_quantity) 14 | FROM 15 | lineitem 16 | WHERE 17 | l_partkey = p_partkey 18 | ); 19 | desc: TPC-H Q17 20 | before: ["include_sql:schema.sql"] 21 | tasks: 22 | - explain:logical_optd,physical_optd 23 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q18.yml.disabled: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | c_name, 4 | c_custkey, 5 | o_orderkey, 6 | o_orderdate, 7 | o_totalprice, 8 | sum(l_quantity) 9 | from 10 | customer, 11 | orders, 12 | lineitem 13 | where 14 | o_orderkey in ( 15 | select 16 | l_orderkey 17 | from 18 | lineitem 19 | group by 20 | l_orderkey having 21 | sum(l_quantity) > 250 -- original: 300 22 | ) 23 | and c_custkey = o_custkey 24 | and o_orderkey = l_orderkey 25 | group by 26 | c_name, 27 | c_custkey, 28 | o_orderkey, 29 | o_orderdate, 30 | o_totalprice 31 | order by 32 | o_totalprice desc, 33 | o_orderdate 34 | limit 100; 35 | desc: TPC-H Q18 36 | before: ["include_sql:schema.sql"] 37 | tasks: 38 | - explain:logical_optd,physical_optd 39 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q19.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | sum(l_extendedprice* (1 - l_discount)) as revenue 4 | FROM 5 | lineitem, 6 | part 7 | WHERE 8 | ( 9 | p_partkey = l_partkey 10 | AND p_brand = 'Brand#12' 11 | AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') 12 | AND l_quantity >= 1 AND l_quantity <= 11 13 | AND p_size BETWEEN 1 AND 5 14 | AND l_shipmode IN ('AIR', 'AIR REG') 15 | AND l_shipinstruct = 'DELIVER IN PERSON' 16 | ) OR ( 17 | p_partkey = l_partkey 18 | AND p_brand = 'Brand#23' 19 | AND p_container IN ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK') 20 | AND l_quantity >= 10 AND l_quantity <= 20 21 | AND p_size BETWEEN 1 AND 10 22 | AND l_shipmode IN ('AIR', 'AIR REG') 23 | AND l_shipinstruct = 'DELIVER IN PERSON' 24 | ) OR ( 25 | p_partkey = l_partkey 26 | AND p_brand = 'Brand#34' 27 | AND p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') 28 | AND l_quantity >= 20 AND l_quantity <= 30 29 | AND p_size BETWEEN 1 AND 15 30 | AND l_shipmode IN ('AIR', 'AIR REG') 31 | AND l_shipinstruct = 'DELIVER IN PERSON' 32 | ) 33 | desc: TPC-H Q19 34 | before: ["include_sql:schema.sql"] 35 | tasks: 36 | - explain:logical_optd,physical_optd 37 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q2.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | s_acctbal, 4 | s_name, 5 | n_name, 6 | p_partkey, 7 | p_mfgr, 8 | s_address, 9 | s_phone, 10 | s_comment 11 | from 12 | part, 13 | supplier, 14 | partsupp, 15 | nation, 16 | region 17 | where 18 | p_partkey = ps_partkey 19 | and s_suppkey = ps_suppkey 20 | and p_size = 4 21 | and p_type like '%TIN' 22 | and s_nationkey = n_nationkey 23 | and n_regionkey = r_regionkey 24 | and r_name = 'AFRICA' 25 | and ps_supplycost = ( 26 | select 27 | min(ps_supplycost) 28 | from 29 | partsupp, 30 | supplier, 31 | nation, 32 | region 33 | where 34 | p_partkey = ps_partkey 35 | and s_suppkey = ps_suppkey 36 | and s_nationkey = n_nationkey 37 | and n_regionkey = r_regionkey 38 | and r_name = 'AFRICA' 39 | ) 40 | order by 41 | s_acctbal desc, 42 | n_name, 43 | s_name, 44 | p_partkey 45 | limit 100; 46 | desc: TPC-H Q2 47 | before: ["include_sql:schema.sql"] 48 | tasks: 49 | - explain:logical_optd,optimized_logical_optd,physical_optd 50 | 51 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q20.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | s_name, 4 | s_address 5 | from 6 | supplier, 7 | nation 8 | where 9 | s_suppkey in ( 10 | select 11 | ps_suppkey 12 | from 13 | partsupp 14 | where 15 | ps_partkey in ( 16 | select 17 | p_partkey 18 | from 19 | part 20 | where 21 | p_name like 'indian%' 22 | ) 23 | and ps_availqty > ( 24 | select 25 | 0.5 * sum(l_quantity) 26 | from 27 | lineitem 28 | where 29 | l_partkey = ps_partkey 30 | and l_suppkey = ps_suppkey 31 | and l_shipdate >= date '1996-01-01' 32 | and l_shipdate < date '1996-01-01' + interval '1' year 33 | ) 34 | ) 35 | and s_nationkey = n_nationkey 36 | and n_name = 'IRAQ' 37 | order by 38 | s_name; 39 | desc: TPC-H Q20 40 | before: ["include_sql:schema.sql"] 41 | tasks: 42 | - explain:logical_optd,physical_optd 43 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q21.yml.disabled: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | s_name, 4 | count(*) as numwait 5 | from 6 | supplier, 7 | lineitem l1, 8 | orders, 9 | nation 10 | where 11 | s_suppkey = l1.l_suppkey 12 | and o_orderkey = l1.l_orderkey 13 | and o_orderstatus = 'F' 14 | and l1.l_receiptdate > l1.l_commitdate 15 | and exists ( 16 | select 17 | * 18 | from 19 | lineitem l2 20 | where 21 | l2.l_orderkey = l1.l_orderkey 22 | and l2.l_suppkey <> l1.l_suppkey 23 | ) 24 | and not exists ( 25 | select 26 | * 27 | from 28 | lineitem l3 29 | where 30 | l3.l_orderkey = l1.l_orderkey 31 | and l3.l_suppkey <> l1.l_suppkey 32 | and l3.l_receiptdate > l3.l_commitdate 33 | ) 34 | and s_nationkey = n_nationkey 35 | and n_name = 'SAUDI ARABIA' 36 | group by 37 | s_name 38 | order by 39 | numwait desc, 40 | s_name 41 | limit 100; 42 | desc: TPC-H Q21 43 | before: ["include_sql:schema.sql"] 44 | tasks: 45 | - explain:logical_optd,physical_optd 46 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q22.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | cntrycode, 4 | count(*) as numcust, 5 | sum(c_acctbal) as totacctbal 6 | from 7 | ( 8 | select 9 | substring(c_phone from 1 for 2) as cntrycode, 10 | c_acctbal 11 | from 12 | customer 13 | where 14 | substring(c_phone from 1 for 2) in 15 | ('13', '31', '23', '29', '30', '18', '17') 16 | and c_acctbal > ( 17 | select 18 | avg(c_acctbal) 19 | from 20 | customer 21 | where 22 | c_acctbal > 0.00 23 | and substring(c_phone from 1 for 2) in 24 | ('13', '31', '23', '29', '30', '18', '17') 25 | ) 26 | and not exists ( 27 | select 28 | * 29 | from 30 | orders 31 | where 32 | o_custkey = c_custkey 33 | ) 34 | ) as custsale 35 | group by 36 | cntrycode 37 | order by 38 | cntrycode; 39 | desc: TPC-H Q22 40 | before: ["include_sql:schema.sql"] 41 | tasks: 42 | - explain:logical_optd,physical_optd 43 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q3.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | l_orderkey, 4 | SUM(l_extendedprice * (1 - l_discount)) AS revenue, 5 | o_orderdate, 6 | o_shippriority 7 | FROM 8 | customer, 9 | orders, 10 | lineitem 11 | WHERE 12 | c_mktsegment = 'FURNITURE' 13 | AND c_custkey = o_custkey 14 | AND l_orderkey = o_orderkey 15 | AND o_orderdate < DATE '1995-03-29' 16 | AND l_shipdate > DATE '1995-03-29' 17 | GROUP BY 18 | l_orderkey, 19 | o_orderdate, 20 | o_shippriority 21 | ORDER BY 22 | revenue DESC, 23 | o_orderdate LIMIT 10; 24 | desc: TPC-H Q3 25 | before: ["include_sql:schema.sql"] 26 | tasks: 27 | - explain:logical_optd,physical_optd 28 | - bench 29 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q4.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | o_orderpriority, 4 | count(*) as order_count 5 | from 6 | orders 7 | where 8 | o_orderdate >= date '1993-07-01' 9 | and o_orderdate < date '1993-07-01' + interval '3' month 10 | and exists ( 11 | select 12 | * 13 | from 14 | lineitem 15 | where 16 | l_orderkey = o_orderkey 17 | and l_commitdate < l_receiptdate 18 | ) 19 | group by 20 | o_orderpriority 21 | order by 22 | o_orderpriority; 23 | desc: TPC-H Q4 24 | before: ["include_sql:schema.sql"] 25 | tasks: 26 | - explain:logical_optd,physical_optd 27 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q5.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | n_name AS nation, 4 | SUM(l_extendedprice * (1 - l_discount)) AS revenue 5 | FROM 6 | customer, 7 | orders, 8 | lineitem, 9 | supplier, 10 | nation, 11 | region 12 | WHERE 13 | c_custkey = o_custkey 14 | AND l_orderkey = o_orderkey 15 | AND l_suppkey = s_suppkey 16 | AND c_nationkey = s_nationkey 17 | AND s_nationkey = n_nationkey 18 | AND n_regionkey = r_regionkey 19 | AND r_name = 'Asia' -- Specified region 20 | AND o_orderdate >= DATE '2023-01-01' 21 | AND o_orderdate < DATE '2024-01-01' 22 | GROUP BY 23 | n_name 24 | ORDER BY 25 | revenue DESC; 26 | desc: TPC-H Q5 27 | before: ["include_sql:schema.sql"] 28 | tasks: 29 | - explain:logical_optd,physical_optd 30 | - bench 31 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q6.planner.sql: -------------------------------------------------------------------------------- 1 | -- TPC-H Q6 2 | SELECT 3 | SUM(l_extendedprice * l_discount) AS revenue_loss 4 | FROM 5 | lineitem 6 | WHERE 7 | l_shipdate >= DATE '2023-01-01' 8 | AND l_shipdate < DATE '2024-01-01' 9 | AND l_discount BETWEEN 0.05 AND 0.07 10 | AND l_quantity < 24; 11 | 12 | /* 13 | LogicalProjection { exprs: [ #0 ] } 14 | └── LogicalAgg 15 | ├── exprs:Agg(Sum) 16 | │ └── Mul 17 | │ ├── #5 18 | │ └── #6 19 | ├── groups: [] 20 | └── LogicalFilter 21 | ├── cond:And 22 | │ ├── Geq 23 | │ │ ├── #10 24 | │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } 25 | │ ├── Lt 26 | │ │ ├── #10 27 | │ │ └── Cast { cast_to: Date32, child: "2024-01-01" } 28 | │ ├── Between { child: Cast { cast_to: Decimal128(30, 15), child: #6 }, lower: Cast { cast_to: Decimal128(30, 15), child: 0.05(float) }, upper: Cast { cast_to: Decimal128(30, 15), child: 0.07(float) } } 29 | │ └── Lt 30 | │ ├── Cast { cast_to: Decimal128(22, 2), child: #4 } 31 | │ └── Cast { cast_to: Decimal128(22, 2), child: 24(i64) } 32 | └── LogicalScan { table: lineitem } 33 | PhysicalAgg 34 | ├── aggrs:Agg(Sum) 35 | │ └── Mul 36 | │ ├── #5 37 | │ └── #6 38 | ├── groups: [] 39 | └── PhysicalFilter 40 | ├── cond:And 41 | │ ├── Geq 42 | │ │ ├── #10 43 | │ │ └── Cast { cast_to: Date32, child: "2023-01-01" } 44 | │ ├── Lt 45 | │ │ ├── #10 46 | │ │ └── Cast { cast_to: Date32, child: "2024-01-01" } 47 | │ ├── Between { child: Cast { cast_to: Decimal128(30, 15), child: #6 }, lower: Cast { cast_to: Decimal128(30, 15), child: 0.05(float) }, upper: Cast { cast_to: Decimal128(30, 15), child: 0.07(float) } } 48 | │ └── Lt 49 | │ ├── Cast { cast_to: Decimal128(22, 2), child: #4 } 50 | │ └── Cast { cast_to: Decimal128(22, 2), child: 24(i64) } 51 | └── PhysicalScan { table: lineitem } 52 | */ 53 | 54 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q6.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | SUM(l_extendedprice * l_discount) AS revenue_loss 4 | FROM 5 | lineitem 6 | WHERE 7 | l_shipdate >= DATE '2023-01-01' 8 | AND l_shipdate < DATE '2024-01-01' 9 | AND l_discount BETWEEN 0.05 AND 0.07 10 | AND l_quantity < 24; 11 | desc: TPC-H Q6 12 | before: ["include_sql:schema.sql"] 13 | tasks: 14 | - explain:logical_optd,physical_optd 15 | - bench 16 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q7.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | supp_nation, 4 | cust_nation, 5 | l_year, 6 | SUM(volume) AS revenue 7 | FROM 8 | ( 9 | SELECT 10 | n1.n_name AS supp_nation, 11 | n2.n_name AS cust_nation, 12 | EXTRACT(YEAR FROM l_shipdate) AS l_year, 13 | l_extendedprice * (1 - l_discount) AS volume 14 | FROM 15 | supplier, 16 | lineitem, 17 | orders, 18 | customer, 19 | nation n1, 20 | nation n2 21 | WHERE 22 | s_suppkey = l_suppkey 23 | AND o_orderkey = l_orderkey 24 | AND c_custkey = o_custkey 25 | AND s_nationkey = n1.n_nationkey 26 | AND c_nationkey = n2.n_nationkey 27 | AND ( 28 | (n1.n_name = 'FRANCE' AND n2.n_name = 'GERMANY') 29 | OR (n1.n_name = 'GERMANY' AND n2.n_name = 'FRANCE') 30 | ) 31 | AND l_shipdate BETWEEN DATE '1995-01-01' AND DATE '1996-12-31' 32 | ) AS shipping 33 | GROUP BY 34 | supp_nation, 35 | cust_nation, 36 | l_year 37 | ORDER BY 38 | supp_nation, 39 | cust_nation, 40 | l_year; 41 | desc: TPC-H Q7 42 | before: ["include_sql:schema.sql"] 43 | tasks: 44 | - explain:logical_optd,physical_optd 45 | - bench 46 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q8.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | select 3 | o_year, 4 | sum(case 5 | when nation = 'IRAQ' then volume 6 | else 0 7 | end) / sum(volume) as mkt_share 8 | from 9 | ( 10 | select 11 | extract(year from o_orderdate) as o_year, 12 | l_extendedprice * (1 - l_discount) as volume, 13 | n2.n_name as nation 14 | from 15 | part, 16 | supplier, 17 | lineitem, 18 | orders, 19 | customer, 20 | nation n1, 21 | nation n2, 22 | region 23 | where 24 | p_partkey = l_partkey 25 | and s_suppkey = l_suppkey 26 | and l_orderkey = o_orderkey 27 | and o_custkey = c_custkey 28 | and c_nationkey = n1.n_nationkey 29 | and n1.n_regionkey = r_regionkey 30 | and r_name = 'AMERICA' 31 | and s_nationkey = n2.n_nationkey 32 | and o_orderdate between date '1995-01-01' and date '1996-12-31' 33 | and p_type = 'ECONOMY ANODIZED STEEL' 34 | ) as all_nations 35 | group by 36 | o_year 37 | order by 38 | o_year; 39 | desc: TPC-H Q8 without top-most limit node 40 | before: ["include_sql:schema.sql"] 41 | tasks: 42 | - explain:logical_optd,physical_optd 43 | - bench 44 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/tpch/q9.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | SELECT 3 | nation, 4 | o_year, 5 | SUM(amount) AS sum_profit 6 | FROM 7 | ( 8 | SELECT 9 | n_name AS nation, 10 | EXTRACT(YEAR FROM o_orderdate) AS o_year, 11 | l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount 12 | FROM 13 | part, 14 | supplier, 15 | lineitem, 16 | partsupp, 17 | orders, 18 | nation 19 | WHERE 20 | s_suppkey = l_suppkey 21 | AND ps_suppkey = l_suppkey 22 | AND ps_partkey = l_partkey 23 | AND p_partkey = l_partkey 24 | AND o_orderkey = l_orderkey 25 | AND s_nationkey = n_nationkey 26 | AND p_name LIKE '%green%' 27 | ) AS profit 28 | GROUP BY 29 | nation, 30 | o_year 31 | ORDER BY 32 | nation, 33 | o_year DESC; 34 | desc: TPC-H Q9 35 | before: ["include_sql:schema.sql"] 36 | tasks: 37 | - explain:logical_optd,physical_optd 38 | - bench 39 | -------------------------------------------------------------------------------- /optd-sqlplannertest/tests/utils/memo_dump.yml: -------------------------------------------------------------------------------- 1 | - sql: | 2 | create table t1(t1v1 int, t1v2 int); 3 | create table t2(t2v1 int, t2v3 int); 4 | insert into t1 values (0, 0), (1, 1), (2, 2); 5 | insert into t2 values (0, 200), (1, 201), (2, 202); 6 | tasks: 7 | - execute 8 | - sql: | 9 | select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1; 10 | desc: test self join 11 | tasks: 12 | - explain[dump_memo_table,enable_tracing]:physical_optd 13 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | stable 2 | -------------------------------------------------------------------------------- /tpch_diff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Directories to compare 4 | dir1="optd_perfbench_workspace/tpch/genned_queries/dbPOSTGRESQL_sf1_sd15721" 5 | dir2="optd_perfbench_workspace/tpch/genned_queries/dbPOSTGRESQL_sf0.01_sd15721" 6 | 7 | # Loop through the file numbers 8 | for i in {1..22}; do 9 | file1="${dir1}/${i}.sql" 10 | file2="${dir2}/${i}.sql" 11 | 12 | # Check if both files exist 13 | if [[ -f "$file1" && -f "$file2" ]]; then 14 | # Use diff to compare files and report differences 15 | diff_output=$(diff "$file1" "$file2") 16 | if [ -n "$diff_output" ]; then 17 | echo "Difference found in file ${i}.sql" 18 | else 19 | echo "No differences in file ${i}.sql" 20 | fi 21 | else 22 | echo "File ${i}.sql does not exist in one of the directories." 23 | fi 24 | done 25 | --------------------------------------------------------------------------------