├── .gitignore ├── .images └── query-plan1.png ├── src ├── lib.rs ├── visitor_utils.rs ├── data_type.rs ├── query_graph │ ├── optimizer │ │ └── rules │ │ │ ├── project_merge.rs │ │ │ ├── top_projection.rs │ │ │ ├── cte_discovery.rs │ │ │ ├── union_merge.rs │ │ │ ├── aggregate_remove.rs │ │ │ ├── filter_project_transpose.rs │ │ │ ├── project_normalization.rs │ │ │ ├── identity_join.rs │ │ │ ├── mod.rs │ │ │ ├── union_pruning.rs │ │ │ ├── filter_aggregate_transpose.rs │ │ │ ├── prune_aggregate_input.rs │ │ │ ├── aggregate_pruning.rs │ │ │ ├── expression_reduction.rs │ │ │ ├── filter_merge.rs │ │ │ ├── filter_normalization.rs │ │ │ ├── filter_apply_transpose.rs │ │ │ ├── remove_passthrough_project.rs │ │ │ ├── filter_join_transpose.rs │ │ │ ├── apply_pruning.rs │ │ │ ├── aggregate_project_transpose.rs │ │ │ ├── aggregate_simplifier.rs │ │ │ ├── join_pruning.rs │ │ │ ├── join_project_transpose.rs │ │ │ └── common_aggregate_discovery.rs │ ├── properties │ │ ├── equivalence_classes.rs │ │ ├── input_dependencies.rs │ │ ├── mod.rs │ │ ├── subqueries.rs │ │ ├── num_columns.rs │ │ ├── row_type.rs │ │ └── correlated_input_refs.rs │ ├── cloner.rs │ ├── json.rs │ └── explain.rs ├── bin │ └── dag.rs ├── value.rs └── scalar_expr │ ├── reduction.rs │ └── equivalence_class.rs ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── README.md ├── tools ├── vis.html ├── cytoscape.html └── d3.html ├── tests └── testdata │ └── explain │ ├── project_normalization.test │ ├── filter_project_transpose.test │ ├── cte_discovery.test │ ├── keys_filter.test │ ├── union_pruning.test │ ├── union_merge.test │ ├── aggregate_project_transpose.test │ └── expression_reduction.test └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /.images/query-plan1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/asenac/rust-sql-playground/HEAD/.images/query-plan1.png -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate lazy_static; 3 | 4 | #[macro_use] 5 | extern crate serde_derive; 6 | 7 | pub mod data_type; 8 | pub mod query_graph; 9 | pub mod scalar_expr; 10 | pub mod value; 11 | pub mod visitor_utils; 12 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rust-sql" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [lib] 9 | name = "rust_sql" 10 | path = "src/lib.rs" 11 | 12 | [[bin]] 13 | name = "dag" 14 | 15 | [dependencies] 16 | datadriven = "0.7.0" 17 | itertools = "0.11.0" 18 | lazy_static = "1.4.0" 19 | serde = "1.0.166" 20 | serde_derive = "1.0.166" 21 | serde_json = "1.0.99" 22 | -------------------------------------------------------------------------------- /src/visitor_utils.rs: -------------------------------------------------------------------------------- 1 | pub enum PreOrderVisitationResult { 2 | VisitInputs, 3 | DoNotVisitInputs, 4 | Abort, 5 | } 6 | 7 | pub enum PostOrderVisitationResult { 8 | Continue, 9 | Abort, 10 | } 11 | 12 | pub struct VisitationStep { 13 | pub node: V, 14 | pub next_child: Option, 15 | } 16 | 17 | impl VisitationStep { 18 | pub fn new(node: V) -> Self { 19 | Self { 20 | node, 21 | next_child: None, 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQL query compiler playground 2 | 3 | This is a SQL query compiler written in Rust mainly for learning and blogging purposes. 4 | 5 | There is no SQL parser yet and its overall functionality is very limited, although the 6 | logical optimizer is getting real. 7 | 8 | ## Blog posts 9 | 10 | * [Part one, the query plan representation](https://andres.senac.es/posts/query-compiler-part-one/) 11 | * [Part two, the query rewrite driver](https://andres.senac.es/posts/query-compiler-part-two-rule-driver/) 12 | 13 | ## Visualizing query plans 14 | 15 | `JsonSerializer` utility can be used to dump the query plan in JSON format that can be 16 | rendered with any of the utilities in `tools` folder, using different graph rendering 17 | libraries. 18 | 19 | ![Query plan][query-plan-1] 20 | 21 | [query-plan-1]: .images/query-plan1.png -------------------------------------------------------------------------------- /src/data_type.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug)] 4 | pub enum DataType { 5 | Bool, 6 | Int, 7 | BigInt, 8 | String, 9 | Unknown, 10 | Any, 11 | Array(Box), 12 | Tuple(Vec), 13 | } 14 | 15 | impl fmt::Display for DataType { 16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 17 | match self { 18 | DataType::Bool => write!(f, "bool"), 19 | DataType::Int => write!(f, "int"), 20 | DataType::BigInt => write!(f, "bigint"), 21 | DataType::String => write!(f, "string"), 22 | DataType::Unknown => write!(f, "unknown"), 23 | DataType::Any => write!(f, "any"), 24 | DataType::Array(elem_type) => write!(f, "array({})", elem_type), 25 | DataType::Tuple(elem_types) => { 26 | write!(f, "tuple(")?; 27 | for (i, data_type) in elem_types.iter().enumerate() { 28 | if i > 0 { 29 | write!(f, ", ")?; 30 | } 31 | write!(f, "{}", data_type)?; 32 | } 33 | write!(f, ")") 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/project_merge.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{OptRuleType, SingleReplacementRule}, 4 | NodeId, QueryGraph, QueryNode, 5 | }, 6 | scalar_expr::rewrite::dereference_scalar_expr, 7 | }; 8 | 9 | pub struct ProjectMergeRule {} 10 | 11 | impl SingleReplacementRule for ProjectMergeRule { 12 | fn rule_type(&self) -> OptRuleType { 13 | OptRuleType::TopDown 14 | } 15 | 16 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 17 | if let QueryNode::Project { input, outputs } = query_graph.node(node_id) { 18 | if let QueryNode::Project { 19 | input: child_input, 20 | outputs: child_outputs, 21 | } = query_graph.node(*input) 22 | { 23 | return Some( 24 | query_graph.project( 25 | *child_input, 26 | outputs 27 | .clone() 28 | .into_iter() 29 | .map(|x| dereference_scalar_expr(&x, &child_outputs)) 30 | .collect(), 31 | ), 32 | ); 33 | } 34 | } 35 | None 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/query_graph/properties/equivalence_classes.rs: -------------------------------------------------------------------------------- 1 | use std::{any::TypeId, rc::Rc}; 2 | 3 | use crate::{ 4 | query_graph::{NodeId, QueryGraph}, 5 | scalar_expr::equivalence_class::{extract_equivalence_classes, EquivalenceClasses}, 6 | }; 7 | 8 | use super::pulled_up_predicates; 9 | 10 | /// Property derived from the pulled up predicates. 11 | pub fn equivalence_classes(query_graph: &QueryGraph, node_id: NodeId) -> Rc { 12 | let type_id = TypeId::of::>(); 13 | if let Some(cached) = query_graph 14 | .property_cache 15 | .borrow_mut() 16 | .node_bottom_up_properties(node_id) 17 | .get(&type_id) 18 | { 19 | return cached 20 | .downcast_ref::>() 21 | .unwrap() 22 | .clone(); 23 | } 24 | // Do not use an else branch since we need to release the borrow above 25 | // in order to compute the pulled up predicates 26 | let predicates = pulled_up_predicates(query_graph, node_id); 27 | let classes = Rc::new(extract_equivalence_classes(&predicates)); 28 | query_graph 29 | .property_cache 30 | .borrow_mut() 31 | .node_bottom_up_properties(node_id) 32 | .insert(type_id, Box::new(classes.clone())); 33 | classes 34 | } 35 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/top_projection.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{OptRuleType, SingleReplacementRule}, 6 | properties::num_columns, 7 | NodeId, QueryGraph, QueryNode, 8 | }, 9 | scalar_expr::ScalarExpr, 10 | }; 11 | 12 | /// Rule that ensures the root node of the query is a projection. 13 | /// 14 | /// Adding a projection as the top level node helps with column pruning 15 | /// as columns that are bound to other columns or constants can be pruned. 16 | pub struct TopProjectionRule {} 17 | 18 | impl SingleReplacementRule for TopProjectionRule { 19 | fn rule_type(&self) -> OptRuleType { 20 | OptRuleType::RootOnly 21 | } 22 | 23 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 24 | if let QueryNode::Project { .. } = query_graph.node(node_id) { 25 | None 26 | } else { 27 | let num_columns = num_columns(query_graph, node_id); 28 | Some( 29 | query_graph.project( 30 | node_id, 31 | (0..num_columns) 32 | .map(|i| ScalarExpr::input_ref(i).into()) 33 | .collect_vec(), 34 | ), 35 | ) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/cte_discovery.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::query_graph::{ 4 | optimizer::{OptRuleType, Rule}, 5 | NodeId, QueryGraph, 6 | }; 7 | 8 | /// Finds duplicated nodes in the query graph and replaces them with the equivalent node 9 | /// with the lowest node ID. 10 | pub struct CteDiscoveryRule {} 11 | 12 | impl Rule for CteDiscoveryRule { 13 | fn rule_type(&self) -> OptRuleType { 14 | OptRuleType::RootOnly 15 | } 16 | 17 | fn apply(&self, query_graph: &mut QueryGraph, _: NodeId) -> Option> { 18 | let mut node_ids = query_graph.nodes.keys().cloned().collect_vec(); 19 | node_ids.sort(); 20 | let replacements = node_ids 21 | .iter() 22 | .enumerate() 23 | .filter_map(|(i, orig_node_id)| { 24 | let node = query_graph.node(*orig_node_id); 25 | node_ids 26 | .iter() 27 | .take(i) 28 | .find(|replacement_node_id| query_graph.node(**replacement_node_id) == node) 29 | .map(|replacement_node_id| (*orig_node_id, *replacement_node_id)) 30 | }) 31 | .collect_vec(); 32 | if replacements.is_empty() { 33 | None 34 | } else { 35 | Some(replacements) 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/union_merge.rs: -------------------------------------------------------------------------------- 1 | use crate::query_graph::{ 2 | optimizer::{OptRuleType, SingleReplacementRule}, 3 | NodeId, QueryGraph, QueryNode, 4 | }; 5 | 6 | pub struct UnionMergeRule {} 7 | 8 | impl SingleReplacementRule for UnionMergeRule { 9 | fn rule_type(&self) -> OptRuleType { 10 | OptRuleType::TopDown 11 | } 12 | 13 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 14 | if let QueryNode::Union { inputs } = query_graph.node(node_id) { 15 | let mut new_inputs = inputs.clone(); 16 | let mut any = false; 17 | while let Some((idx, inputs)) = 18 | new_inputs.iter().enumerate().find_map(|(idx, input)| { 19 | if let QueryNode::Union { inputs } = query_graph.node(*input) { 20 | Some((idx, inputs.clone())) 21 | } else { 22 | None 23 | } 24 | }) 25 | { 26 | let mut flattened_union = (0..idx) 27 | .map(|i| new_inputs[i].clone()) 28 | .collect::>(); 29 | flattened_union.extend(inputs); 30 | flattened_union.extend((idx + 1..new_inputs.len()).map(|i| new_inputs[i].clone())); 31 | new_inputs = flattened_union; 32 | any = true; 33 | } 34 | if any { 35 | return Some(query_graph.add_node(QueryNode::Union { inputs: new_inputs })); 36 | } 37 | } 38 | None 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/aggregate_remove.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{OptRuleType, SingleReplacementRule}, 4 | properties::unique_key, 5 | NodeId, QueryGraph, QueryNode, 6 | }, 7 | scalar_expr::ScalarExpr, 8 | }; 9 | 10 | pub struct AggregateRemoveRule {} 11 | 12 | impl SingleReplacementRule for AggregateRemoveRule { 13 | fn rule_type(&self) -> OptRuleType { 14 | OptRuleType::Always 15 | } 16 | 17 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 18 | if let QueryNode::Aggregate { 19 | group_key, 20 | aggregates, 21 | input, 22 | } = query_graph.node(node_id) 23 | { 24 | if !group_key.is_empty() { 25 | if let Some(input_unique_key) = unique_key(query_graph, *input) { 26 | let group_key_expr = group_key 27 | .iter() 28 | .map(|col| ScalarExpr::input_ref(*col).into()) 29 | .collect::>(); 30 | if input_unique_key.iter().all(|e| group_key_expr.contains(e)) { 31 | let mut values = group_key_expr; 32 | values.extend( 33 | aggregates 34 | .iter() 35 | .map(|aggregate| aggregate.on_unique_tuple()), 36 | ); 37 | return Some(query_graph.project(*input, values)); 38 | } 39 | } 40 | } 41 | } 42 | None 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_project_transpose.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{OptRuleType, SingleReplacementRule}, 4 | NodeId, QueryGraph, QueryNode, 5 | }, 6 | scalar_expr::rewrite::dereference_scalar_expr, 7 | }; 8 | 9 | /// Given a Filter node on top of a Project node, it transposes them by creating a 10 | /// new Filter node and a new Project node on top of it. 11 | /// 12 | /// If the Project node is a shared node, ie. it has multiple parents, the original 13 | /// Project node will still be referenced by the rest of its parents. In our model, 14 | /// we are only interested in preserving shared Joins, Aggregates and any node 15 | /// performing some expensive operation. 16 | pub struct FilterProjectTransposeRule {} 17 | 18 | impl SingleReplacementRule for FilterProjectTransposeRule { 19 | fn rule_type(&self) -> OptRuleType { 20 | OptRuleType::TopDown 21 | } 22 | 23 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 24 | if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) { 25 | if let QueryNode::Project { 26 | outputs, 27 | input: proj_input, 28 | } = query_graph.node(*input) 29 | { 30 | let new_conditions = conditions 31 | .iter() 32 | .map(|c| dereference_scalar_expr(c, outputs)) 33 | .collect::>(); 34 | let outputs = outputs.clone(); 35 | let new_filter = query_graph.filter(*proj_input, new_conditions); 36 | return Some(query_graph.project(new_filter, outputs)); 37 | } 38 | } 39 | None 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/project_normalization.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{OptRuleType, SingleReplacementRule}, 4 | properties::{equivalence_classes, pulled_up_predicates}, 5 | NodeId, QueryGraph, QueryNode, 6 | }, 7 | scalar_expr::{ 8 | equivalence_class::to_replacement_map, rewrite::replace_sub_expressions_pre, ScalarExpr, 9 | ScalarExprRef, 10 | }, 11 | }; 12 | 13 | /// Replace sub-expressions in a projection with the representative of the equivalence 14 | /// class they belong to. 15 | pub struct ProjectNormalizationRule {} 16 | 17 | impl SingleReplacementRule for ProjectNormalizationRule { 18 | fn rule_type(&self) -> OptRuleType { 19 | OptRuleType::Always 20 | } 21 | 22 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 23 | if let QueryNode::Project { outputs, input } = query_graph.node(node_id) { 24 | let classes = equivalence_classes(query_graph, *input); 25 | let predicates = pulled_up_predicates(query_graph, *input); 26 | let mut replacement_map = to_replacement_map(&classes); 27 | let true_literal: ScalarExprRef = ScalarExpr::true_literal().into(); 28 | replacement_map.extend( 29 | predicates 30 | .iter() 31 | .map(|predicate| (predicate.clone(), true_literal.clone())), 32 | ); 33 | let new_outputs = outputs 34 | .iter() 35 | .map(|expr| replace_sub_expressions_pre(expr, &replacement_map)) 36 | .collect::>(); 37 | 38 | if new_outputs.iter().zip(outputs.iter()).any(|(x, y)| x != y) { 39 | return Some(query_graph.project(*input, new_outputs)); 40 | } 41 | } 42 | None 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/identity_join.rs: -------------------------------------------------------------------------------- 1 | use crate::query_graph::{ 2 | optimizer::{OptRuleType, SingleReplacementRule}, 3 | properties::{empty_key, num_columns}, 4 | JoinType, NodeId, QueryGraph, QueryNode, 5 | }; 6 | 7 | /// Removes joins where one of the inputs is a relation always projecting a single 8 | /// row and has no columns. 9 | pub struct IdentityJoinRule; 10 | 11 | impl SingleReplacementRule for IdentityJoinRule { 12 | fn rule_type(&self) -> OptRuleType { 13 | OptRuleType::Always 14 | } 15 | 16 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 17 | if let QueryNode::Join { 18 | join_type: JoinType::Inner, 19 | conditions, 20 | left, 21 | right, 22 | } = query_graph.node(node_id) 23 | { 24 | let left_num_columns = num_columns(query_graph, *left); 25 | let right_num_columns = num_columns(query_graph, *right); 26 | let left_is_identity = left_num_columns == 0 27 | && empty_key(query_graph, *left) 28 | .and_then(|key| Some(key.lower_bound == 1 && key.upper_bound == Some(1))) 29 | .unwrap_or(false); 30 | let right_is_identity = right_num_columns == 0 31 | && empty_key(query_graph, *right) 32 | .and_then(|key| Some(key.lower_bound == 1 && key.upper_bound == Some(1))) 33 | .unwrap_or(false); 34 | let non_identity_relation = match (left_is_identity, right_is_identity) { 35 | (true, _) => Some(*right), 36 | (_, true) => Some(*left), 37 | _ => None, 38 | }; 39 | if let Some(non_identity_relation) = non_identity_relation { 40 | return Some(query_graph.filter(non_identity_relation, conditions.clone())); 41 | } 42 | } 43 | None 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/mod.rs: -------------------------------------------------------------------------------- 1 | mod aggregate_project_transpose; 2 | mod aggregate_pruning; 3 | mod aggregate_remove; 4 | mod aggregate_simplifier; 5 | mod apply_pruning; 6 | mod common_aggregate_discovery; 7 | mod cte_discovery; 8 | mod equality_propagation; 9 | mod expression_reduction; 10 | mod filter_aggregate_transpose; 11 | mod filter_apply_transpose; 12 | mod filter_join_transpose; 13 | mod filter_merge; 14 | mod filter_normalization; 15 | mod filter_project_transpose; 16 | mod identity_join; 17 | mod join_project_transpose; 18 | mod join_pruning; 19 | mod outer_to_inner_join; 20 | mod project_merge; 21 | mod project_normalization; 22 | mod prune_aggregate_input; 23 | mod remove_passthrough_project; 24 | mod top_projection; 25 | mod union_merge; 26 | mod union_pruning; 27 | 28 | pub use aggregate_project_transpose::AggregateProjectTransposeRule; 29 | pub use aggregate_pruning::AggregatePruningRule; 30 | pub use aggregate_remove::AggregateRemoveRule; 31 | pub use aggregate_simplifier::AggregateSimplifierRule; 32 | pub use apply_pruning::ApplyPruningRule; 33 | pub use common_aggregate_discovery::CommonAggregateDiscoveryRule; 34 | pub use cte_discovery::CteDiscoveryRule; 35 | pub use equality_propagation::EqualityPropagationRule; 36 | pub use expression_reduction::ExpressionReductionRule; 37 | pub use filter_aggregate_transpose::FilterAggregateTransposeRule; 38 | pub use filter_apply_transpose::FilterApplyTransposeRule; 39 | pub use filter_join_transpose::FilterJoinTransposeRule; 40 | pub use filter_merge::FilterMergeRule; 41 | pub use filter_normalization::FilterNormalizationRule; 42 | pub use filter_project_transpose::FilterProjectTransposeRule; 43 | pub use identity_join::IdentityJoinRule; 44 | pub use join_project_transpose::JoinProjectTransposeRule; 45 | pub use join_pruning::JoinPruningRule; 46 | pub use outer_to_inner_join::OuterToInnerJoinRule; 47 | pub use project_merge::ProjectMergeRule; 48 | pub use project_normalization::ProjectNormalizationRule; 49 | pub use prune_aggregate_input::PruneAggregateInputRule; 50 | pub use remove_passthrough_project::RemovePassthroughProjectRule; 51 | pub use top_projection::TopProjectionRule; 52 | pub use union_merge::UnionMergeRule; 53 | pub use union_pruning::UnionPruningRule; 54 | -------------------------------------------------------------------------------- /src/query_graph/properties/input_dependencies.rs: -------------------------------------------------------------------------------- 1 | use std::{any::TypeId, collections::HashSet, rc::Rc}; 2 | 3 | use crate::{ 4 | query_graph::{NodeId, QueryGraph, QueryNode}, 5 | scalar_expr::visitor::store_input_dependencies, 6 | }; 7 | 8 | use super::num_columns; 9 | 10 | struct InputDependenciesTag; 11 | 12 | pub fn input_dependencies(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 13 | let type_id = TypeId::of::(); 14 | if let Some(cached) = query_graph 15 | .property_cache 16 | .borrow_mut() 17 | .single_node_properties(node_id) 18 | .get(&type_id) 19 | { 20 | return cached.downcast_ref::>>().unwrap().clone(); 21 | } 22 | let mut dependencies = HashSet::new(); 23 | match query_graph.node(node_id) { 24 | QueryNode::QueryRoot { input } => { 25 | if let Some(input) = input { 26 | dependencies.extend(0..num_columns(query_graph, *input)); 27 | } 28 | } 29 | QueryNode::Project { outputs: exprs, .. } 30 | | QueryNode::Join { 31 | conditions: exprs, .. 32 | } => exprs 33 | .iter() 34 | .for_each(|e| store_input_dependencies(e, &mut dependencies)), 35 | QueryNode::TableScan { .. } => {} 36 | QueryNode::Aggregate { 37 | group_key, 38 | aggregates, 39 | .. 40 | } => { 41 | dependencies.extend(group_key.iter()); 42 | for aggregate in aggregates.iter() { 43 | dependencies.extend(aggregate.operands.iter()); 44 | } 45 | } 46 | QueryNode::Filter { 47 | conditions: exprs, .. 48 | } => exprs 49 | .iter() 50 | .for_each(|e| store_input_dependencies(e, &mut dependencies)), 51 | QueryNode::Union { .. } | QueryNode::SubqueryRoot { .. } | QueryNode::Apply { .. } => { 52 | dependencies.extend(0..num_columns(query_graph, node_id)) 53 | } 54 | } 55 | let dependencies = Rc::new(dependencies); 56 | query_graph 57 | .property_cache 58 | .borrow_mut() 59 | .single_node_properties(node_id) 60 | .insert(type_id, Box::new(dependencies.clone())); 61 | dependencies 62 | } 63 | -------------------------------------------------------------------------------- /tools/vis.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 17 | 18 | 19 |
20 | 21 | 22 | 73 | 74 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/union_pruning.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{ 6 | utils::{ 7 | apply_map_to_parents_and_replace_input, required_columns_from_parents, 8 | required_columns_to_column_map, 9 | }, 10 | OptRuleType, Rule, 11 | }, 12 | NodeId, QueryGraph, QueryNode, 13 | }, 14 | scalar_expr::ScalarExpr, 15 | }; 16 | 17 | /// Rule that given a shared union where all its parents are pruning projections, computes 18 | /// the superset of columns required by all its parents, and prunes the columns not used 19 | /// by any of them, replacing the parents of the union with projections over the pruned 20 | /// union. A pruning projection is inserted under each branch of the pruned union. 21 | pub struct UnionPruningRule {} 22 | 23 | impl Rule for UnionPruningRule { 24 | fn rule_type(&self) -> OptRuleType { 25 | OptRuleType::TopDown 26 | } 27 | 28 | fn apply( 29 | &self, 30 | query_graph: &mut QueryGraph, 31 | node_id: NodeId, 32 | ) -> Option> { 33 | if let QueryNode::Union { inputs } = query_graph.node(node_id) { 34 | if let Some(required_columns) = required_columns_from_parents(query_graph, node_id) { 35 | // Prune the branches 36 | let column_map = required_columns_to_column_map(&required_columns); 37 | let proj = column_map 38 | .iter() 39 | .map(|(i, _)| *i) 40 | .sorted() 41 | .map(|i| ScalarExpr::InputRef { index: i }.into()) 42 | .collect::>(); 43 | let new_inputs = inputs 44 | .clone() // clone to make the borrow checker happy 45 | .iter() 46 | .map(|input| query_graph.project(*input, proj.clone())) 47 | .collect(); 48 | let new_union = query_graph.add_node(QueryNode::Union { inputs: new_inputs }); 49 | 50 | // Rewrite the parent projections 51 | return Some(apply_map_to_parents_and_replace_input( 52 | query_graph, 53 | node_id, 54 | &column_map, 55 | new_union, 56 | )); 57 | } 58 | } 59 | None 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/bin/dag.rs: -------------------------------------------------------------------------------- 1 | use rust_sql::query_graph::optimizer::OptimizerContext; 2 | use rust_sql::query_graph::optimizer::DEFAULT_OPTIMIZER; 3 | use rust_sql::query_graph::*; 4 | use rust_sql::scalar_expr::*; 5 | 6 | fn main() { 7 | let mut query_graph = { 8 | let mut query_graph = QueryGraph::new(); 9 | // select col0, col2 from (select col0, col9, col2 || col4 from (select * from table_1 where col0 = 'hello') where col5 = 'world') 10 | let table_scan_1 = query_graph.table_scan(1, 10); 11 | let filter_1 = query_graph.filter( 12 | table_scan_1, 13 | vec![ScalarExpr::input_ref(0) 14 | .binary( 15 | BinaryOp::Eq, 16 | ScalarExpr::string_literal("hello".to_string()).into(), 17 | ) 18 | .into()], 19 | ); 20 | let filter_2 = query_graph.filter( 21 | filter_1, 22 | vec![ScalarExpr::input_ref(5) 23 | .binary( 24 | BinaryOp::Eq, 25 | ScalarExpr::string_literal("world".to_string()).into(), 26 | ) 27 | .into()], 28 | ); 29 | let project_1 = query_graph.project( 30 | filter_2, 31 | vec![ 32 | ScalarExpr::input_ref(0).into(), 33 | ScalarExpr::input_ref(9).into(), 34 | ScalarExpr::nary( 35 | NaryOp::Concat, 36 | vec![ 37 | ScalarExpr::input_ref(2).into(), 38 | ScalarExpr::input_ref(4).into(), 39 | ], 40 | ) 41 | .into(), 42 | ], 43 | ); 44 | let project_2 = query_graph.project( 45 | project_1, 46 | vec![ 47 | ScalarExpr::input_ref(0).into(), 48 | ScalarExpr::input_ref(2).into(), 49 | ], 50 | ); 51 | query_graph.set_entry_node(project_2); 52 | query_graph 53 | }; 54 | 55 | let optimizer = &DEFAULT_OPTIMIZER; 56 | 57 | println!("Before:\n\n{}", query_graph.fully_annotated_explain()); 58 | 59 | println!("Before:\n\n{}", query_graph.explain()); 60 | let mut opt_context = OptimizerContext::new(); 61 | optimizer.optimize(&mut opt_context, &mut query_graph); 62 | println!("After:\n\n{}", query_graph.explain()); 63 | 64 | query_graph.garbage_collect(); 65 | println!("After:\n\n{}", query_graph.explain()); 66 | } 67 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_aggregate_transpose.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule}, 6 | properties::pulled_up_predicates, 7 | NodeId, QueryGraph, QueryNode, 8 | }, 9 | scalar_expr::rewrite::{apply_column_map, to_column_map_for_expr_push_down}, 10 | }; 11 | 12 | pub struct FilterAggregateTransposeRule {} 13 | 14 | impl SingleReplacementRule for FilterAggregateTransposeRule { 15 | fn rule_type(&self) -> OptRuleType { 16 | OptRuleType::TopDown 17 | } 18 | 19 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 20 | if let QueryNode::Aggregate { 21 | group_key, 22 | aggregates, 23 | input: agg_input, 24 | } = query_graph.node(node_id) 25 | { 26 | if let Some(conditions) = common_parent_filters(query_graph, node_id) { 27 | let column_map = to_column_map_for_expr_push_down(group_key); 28 | let known_predicates = pulled_up_predicates(query_graph, *agg_input); 29 | let pushable_conditions = conditions 30 | .iter() 31 | .enumerate() 32 | .filter_map(|(i, expr)| { 33 | if let Some(condition) = apply_column_map(expr, &column_map) { 34 | if !known_predicates.contains(&condition) { 35 | return Some((i, condition)); 36 | } 37 | } 38 | None 39 | }) 40 | .collect::>(); 41 | 42 | if !pushable_conditions.is_empty() { 43 | let new_group_key = group_key.clone(); 44 | let new_aggregates = aggregates.clone(); 45 | let new_filter = query_graph.filter( 46 | *agg_input, 47 | pushable_conditions 48 | .iter() 49 | .map(|(_, expr)| expr.clone()) 50 | .collect::>(), 51 | ); 52 | let new_aggregate = query_graph.add_node(QueryNode::Aggregate { 53 | group_key: new_group_key, 54 | aggregates: new_aggregates, 55 | input: new_filter, 56 | }); 57 | 58 | return Some(new_aggregate); 59 | } 60 | } 61 | } 62 | None 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/prune_aggregate_input.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeSet, HashMap}; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | query_graph::{ 7 | optimizer::{OptRuleType, SingleReplacementRule}, 8 | properties::{input_dependencies, num_columns}, 9 | NodeId, QueryGraph, QueryNode, 10 | }, 11 | scalar_expr::{AggregateExpr, ScalarExpr}, 12 | }; 13 | 14 | /// Given an aggregate node not using all the columns from its input, it inserts 15 | /// a pruning projection and replaces it with a new aggregation over the pruning 16 | /// projection. 17 | pub struct PruneAggregateInputRule {} 18 | 19 | impl SingleReplacementRule for PruneAggregateInputRule { 20 | fn rule_type(&self) -> OptRuleType { 21 | OptRuleType::TopDown 22 | } 23 | 24 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 25 | if let QueryNode::Aggregate { 26 | group_key, 27 | aggregates, 28 | input, 29 | } = query_graph.node(node_id) 30 | { 31 | let num_columns = num_columns(query_graph, *input); 32 | let input_dependencies = input_dependencies(query_graph, node_id); 33 | if num_columns != input_dependencies.len() { 34 | let column_map = input_dependencies 35 | .iter() 36 | .sorted() 37 | .enumerate() 38 | .map(|(i, j)| (*j, i)) 39 | .collect::>(); 40 | let new_group_key = group_key 41 | .iter() 42 | .map(|k| *column_map.get(k).unwrap()) 43 | .collect::>(); 44 | let new_aggregates = aggregates 45 | .iter() 46 | .map(|k| { 47 | AggregateExpr { 48 | op: k.op.clone(), 49 | operands: k 50 | .operands 51 | .iter() 52 | .map(|e| *column_map.get(e).unwrap()) 53 | .collect_vec(), 54 | } 55 | .into() 56 | }) 57 | .collect_vec(); 58 | let project_outputs = input_dependencies 59 | .iter() 60 | .sorted() 61 | .map(|i| ScalarExpr::input_ref(*i).into()) 62 | .collect(); 63 | 64 | let pruning_project = query_graph.project(*input, project_outputs); 65 | return Some(query_graph.add_node(QueryNode::Aggregate { 66 | group_key: new_group_key, 67 | aggregates: new_aggregates, 68 | input: pruning_project, 69 | })); 70 | } 71 | } 72 | None 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/aggregate_pruning.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::query_graph::{ 4 | optimizer::{ 5 | utils::{ 6 | apply_map_to_parents_and_replace_input, required_columns_from_parents, 7 | required_columns_to_column_map, 8 | }, 9 | OptRuleType, Rule, 10 | }, 11 | properties::num_columns, 12 | NodeId, QueryGraph, QueryNode, 13 | }; 14 | 15 | /// Rule that given a shared aggregate where all its parents are pruning projections, computes 16 | /// the superset of columns required by all its parents, and prunes the columns not used 17 | /// by any of them, replacing the parents of the aggregate with projections over the pruned 18 | /// aggregate. 19 | /// Only aggregate expressions can be pruned. 20 | pub struct AggregatePruningRule {} 21 | 22 | impl Rule for AggregatePruningRule { 23 | fn rule_type(&self) -> OptRuleType { 24 | OptRuleType::TopDown 25 | } 26 | 27 | fn apply( 28 | &self, 29 | query_graph: &mut QueryGraph, 30 | node_id: NodeId, 31 | ) -> Option> { 32 | if let QueryNode::Aggregate { 33 | group_key, 34 | aggregates, 35 | input, 36 | } = query_graph.node(node_id) 37 | { 38 | if let Some(mut required_columns) = required_columns_from_parents(query_graph, node_id) 39 | { 40 | // All the columns from the grouping key are implicitly required 41 | required_columns.extend(0..group_key.len()); 42 | let num_columns = num_columns(query_graph, node_id); 43 | if required_columns.len() == num_columns { 44 | return None; 45 | } 46 | let new_group_key = group_key.clone(); 47 | let new_aggregates = aggregates 48 | .iter() 49 | .enumerate() 50 | .filter(|(i, _)| { 51 | let col_offset = group_key.len() + i; 52 | required_columns.contains(&col_offset) 53 | }) 54 | .map(|(_, e)| e.clone()) 55 | .collect_vec(); 56 | assert_ne!(new_aggregates.len(), aggregates.len()); 57 | let new_input = *input; 58 | let new_aggregate = query_graph.add_node(QueryNode::Aggregate { 59 | group_key: new_group_key, 60 | aggregates: new_aggregates, 61 | input: new_input, 62 | }); 63 | 64 | // Rewrite the parent projections 65 | let column_map = required_columns_to_column_map(&required_columns); 66 | return Some(apply_map_to_parents_and_replace_input( 67 | query_graph, 68 | node_id, 69 | &column_map, 70 | new_aggregate, 71 | )); 72 | } 73 | } 74 | None 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/expression_reduction.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{OptRuleType, SingleReplacementRule}, 6 | properties::row_type, 7 | NodeId, QueryGraph, QueryNode, 8 | }, 9 | scalar_expr::reduction::reduce_and_prune_exists_subplans_recursively, 10 | }; 11 | 12 | pub struct ExpressionReductionRule; 13 | 14 | impl SingleReplacementRule for ExpressionReductionRule { 15 | fn rule_type(&self) -> OptRuleType { 16 | OptRuleType::Always 17 | } 18 | 19 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 20 | let new_node = match query_graph.node(node_id) { 21 | QueryNode::Project { outputs, input } => { 22 | let row_type = row_type(query_graph, *input); 23 | let input = *input; 24 | let mut outputs = outputs.clone(); 25 | outputs.iter_mut().for_each(|e| { 26 | *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type) 27 | }); 28 | query_graph.project(input, outputs) 29 | } 30 | QueryNode::Filter { conditions, input } => { 31 | let row_type = row_type(query_graph, *input); 32 | let input = *input; 33 | let mut conditions = conditions.clone(); 34 | conditions.iter_mut().for_each(|e| { 35 | *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type) 36 | }); 37 | query_graph.filter(input, conditions) 38 | } 39 | QueryNode::Join { 40 | join_type, 41 | conditions, 42 | left, 43 | right, 44 | } => { 45 | let left_row_type = row_type(query_graph, *left); 46 | let right_row_type = row_type(query_graph, *right); 47 | let row_type = left_row_type 48 | .iter() 49 | .chain(right_row_type.iter()) 50 | .cloned() 51 | .collect_vec(); 52 | let left = *left; 53 | let right = *right; 54 | let join_type = join_type.clone(); 55 | let mut conditions = conditions.clone(); 56 | conditions.iter_mut().for_each(|e| { 57 | *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type) 58 | }); 59 | query_graph.add_node(QueryNode::Join { 60 | join_type, 61 | conditions, 62 | left, 63 | right, 64 | }) 65 | } 66 | _ => node_id, 67 | }; 68 | // Note: the graph may contain duplicated nodes as a result of input 69 | // replacements. 70 | if new_node != node_id && query_graph.node(new_node) != query_graph.node(node_id) { 71 | Some(new_node) 72 | } else { 73 | None 74 | } 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_merge.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::query_graph::{ 4 | optimizer::{OptRuleType, SingleReplacementRule}, 5 | NodeId, QueryGraph, QueryNode, 6 | }; 7 | 8 | /// Optimization rule that fuses two chained Filter nodes, concatenating the filter expressions 9 | /// they contain. 10 | pub struct FilterMergeRule {} 11 | 12 | impl SingleReplacementRule for FilterMergeRule { 13 | fn rule_type(&self) -> OptRuleType { 14 | OptRuleType::TopDown 15 | } 16 | 17 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 18 | if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) { 19 | if let QueryNode::Filter { 20 | conditions: child_conditions, 21 | input: child_input, 22 | } = query_graph.node(*input) 23 | { 24 | let conditions = conditions 25 | .iter() 26 | .cloned() 27 | .chain(child_conditions.iter().cloned()) 28 | .collect_vec(); 29 | let new_input = *child_input; 30 | return Some(query_graph.filter(new_input, conditions)); 31 | } 32 | } 33 | None 34 | } 35 | } 36 | 37 | #[cfg(test)] 38 | mod tests { 39 | use crate::{ 40 | query_graph::QueryGraph, 41 | query_graph::{optimizer::SingleReplacementRule, QueryNode}, 42 | scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef}, 43 | }; 44 | 45 | use super::FilterMergeRule; 46 | 47 | #[test] 48 | fn test_filter_merge() { 49 | let mut query_graph = QueryGraph::new(); 50 | let table_scan_id = query_graph.table_scan(0, 10); 51 | let project_id = query_graph.project( 52 | table_scan_id, 53 | (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(), 54 | ); 55 | 56 | let filter_1: ScalarExprRef = ScalarExpr::input_ref(0) 57 | .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into()) 58 | .into(); 59 | let filter_id_1 = query_graph.filter(project_id, vec![filter_1.clone()]); 60 | let filter_2: ScalarExprRef = ScalarExpr::input_ref(2) 61 | .binary(BinaryOp::Gt, ScalarExpr::input_ref(3).into()) 62 | .into(); 63 | let filter_id_2 = query_graph.filter(filter_id_1, vec![filter_2.clone()]); 64 | query_graph.set_entry_node(filter_id_2); 65 | 66 | let filter_merge_rule = FilterMergeRule {}; 67 | assert!(filter_merge_rule 68 | .apply(&mut query_graph, project_id) 69 | .is_none()); 70 | 71 | assert!(filter_merge_rule 72 | .apply(&mut query_graph, filter_id_1) 73 | .is_none()); 74 | 75 | let merged_filter_id = filter_merge_rule 76 | .apply(&mut query_graph, filter_id_2) 77 | .unwrap(); 78 | if let QueryNode::Filter { 79 | input, conditions, .. 80 | } = query_graph.node(merged_filter_id) 81 | { 82 | assert_eq!(*input, project_id); 83 | assert_eq!(*conditions, vec![filter_2, filter_1]); 84 | } else { 85 | panic!(); 86 | } 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_normalization.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{OptRuleType, SingleReplacementRule}, 6 | properties::{equivalence_classes, pulled_up_predicates}, 7 | NodeId, QueryGraph, QueryNode, 8 | }, 9 | scalar_expr::{ 10 | equivalence_class::to_replacement_map, rewrite::replace_sub_expressions_pre, ScalarExpr, 11 | ScalarExprRef, 12 | }, 13 | }; 14 | 15 | /// Rule that, among other things, removes filter nodes, either partially or fully, enforcing 16 | /// predicates that are already enforced by some descendent node. 17 | /// 18 | /// Expressions are normalized so that each sub-expression is replaced with the representative 19 | /// of their class, if any. For example, if we know that `'hello'` and `ref_1` belong to the 20 | /// same equivalence class, then we can replace any appearance of `ref_1` with `'hello'` literal 21 | /// as literals come before input references. 22 | /// 23 | /// Finally, it removes TRUE conditions from filter nodes. 24 | pub struct FilterNormalizationRule {} 25 | 26 | impl SingleReplacementRule for FilterNormalizationRule { 27 | fn rule_type(&self) -> OptRuleType { 28 | OptRuleType::Always 29 | } 30 | 31 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 32 | if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) { 33 | let classes = equivalence_classes(query_graph, *input); 34 | let predicates = pulled_up_predicates(query_graph, *input); 35 | let mut replacement_map = to_replacement_map(&classes); 36 | let true_literal: ScalarExprRef = ScalarExpr::true_literal().into(); 37 | // Anything that is already enforced by a descendent node, can be assumed 38 | // to be true. 39 | replacement_map.extend( 40 | predicates 41 | .iter() 42 | .map(|predicate| (predicate.clone(), true_literal.clone())), 43 | ); 44 | // [A = 1, B = 1 OR A = 1] results in [A = 1, B = 1 OR TRUE] which will 45 | // be later reduced to just [A = 1]. 46 | let mut new_conditions = conditions.clone(); 47 | for i in 0..new_conditions.len() { 48 | let mut replacement_map = replacement_map.clone(); 49 | replacement_map.extend( 50 | new_conditions 51 | .iter() 52 | .enumerate() 53 | .filter(|(j, _)| i != *j) 54 | .map(|(_, e)| (e.clone(), true_literal.clone())), 55 | ); 56 | new_conditions[i] = 57 | replace_sub_expressions_pre(&new_conditions[i], &replacement_map); 58 | } 59 | // TODO(asenac) reduce expressions after applying the replacements. All of the above 60 | // could be part of the reduction of AND expressions. 61 | let new_conditions = new_conditions 62 | .into_iter() 63 | .filter(|e| *e != true_literal) 64 | .sorted() 65 | .dedup() 66 | .collect_vec(); 67 | 68 | if new_conditions != *conditions { 69 | return Some(query_graph.filter(*input, new_conditions)); 70 | } 71 | } 72 | None 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_apply_transpose.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule}, 4 | properties::{num_columns, pulled_up_predicates}, 5 | ApplyType, NodeId, QueryGraph, QueryNode, 6 | }, 7 | scalar_expr::{rewrite::shift_left_input_refs, visitor::collect_input_dependencies}, 8 | }; 9 | 10 | /// Rule that pushes filters through apply. 11 | /// 12 | /// Collects the common filter among all the parents of the apply and, pushes down those 13 | /// only referring to one apply input. 14 | pub struct FilterApplyTransposeRule {} 15 | 16 | impl SingleReplacementRule for FilterApplyTransposeRule { 17 | fn rule_type(&self) -> OptRuleType { 18 | OptRuleType::TopDown 19 | } 20 | 21 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 22 | if let QueryNode::Apply { 23 | apply_type, 24 | left, 25 | right, 26 | correlation, 27 | } = query_graph.node(node_id) 28 | { 29 | if let Some(common_conditions) = common_parent_filters(query_graph, node_id) { 30 | let left_num_columns = num_columns(query_graph, *left); 31 | let mut left_predicates = Vec::new(); 32 | let mut right_predicates = Vec::new(); 33 | 34 | let known_predicates = pulled_up_predicates(query_graph, node_id); 35 | let allowed_right_pushdown = match apply_type { 36 | ApplyType::Inner => true, 37 | ApplyType::LeftOuter => false, 38 | }; 39 | 40 | for condition in common_conditions.iter() { 41 | if known_predicates.contains(condition) { 42 | // Skip those already known to be enforced either 43 | // by any descendent node. 44 | continue; 45 | } 46 | let dependencies = collect_input_dependencies(condition); 47 | if !dependencies.is_empty() { 48 | if dependencies.iter().all(|x| *x < left_num_columns) { 49 | left_predicates.push(condition.clone()); 50 | } else if allowed_right_pushdown 51 | && dependencies.iter().all(|x| *x >= left_num_columns) 52 | { 53 | right_predicates 54 | .push(shift_left_input_refs(condition, left_num_columns)); 55 | } 56 | } 57 | } 58 | 59 | if !left_predicates.is_empty() || !right_predicates.is_empty() { 60 | let correlation = correlation.clone(); 61 | let left = *left; 62 | let right = *right; 63 | let apply_type = *apply_type; 64 | let left = query_graph.filter(left, left_predicates); 65 | let right = query_graph.filter(right, right_predicates); 66 | 67 | return Some(query_graph.add_node(QueryNode::Apply { 68 | correlation, 69 | left, 70 | right, 71 | apply_type, 72 | })); 73 | } 74 | } 75 | } 76 | None 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/query_graph/properties/mod.rs: -------------------------------------------------------------------------------- 1 | //! This module contains the different properties that can be computed from the query graph. 2 | //! 3 | //! Most of these properties are computed bottom-up and contain a lot of boilerplate code that 4 | //! could make use of some generics. 5 | //! 6 | use std::{ 7 | any::{Any, TypeId}, 8 | collections::HashMap, 9 | }; 10 | 11 | use crate::query_graph::NodeId; 12 | 13 | mod column_provenance; 14 | mod correlated_input_refs; 15 | mod equivalence_classes; 16 | mod input_dependencies; 17 | mod keys; 18 | mod num_columns; 19 | mod pulled_up_predicates; 20 | mod row_type; 21 | mod subqueries; 22 | 23 | pub use column_provenance::column_provenance; 24 | pub use column_provenance::ColumnProvenanceInfo; 25 | pub use correlated_input_refs::node_correlated_input_refs; 26 | pub use correlated_input_refs::subgraph_correlated_input_refs; 27 | pub use correlated_input_refs::subgraph_correlated_input_refs_annotator; 28 | pub use equivalence_classes::equivalence_classes; 29 | pub use input_dependencies::input_dependencies; 30 | pub use keys::empty_key; 31 | pub use keys::is_empty_relation; 32 | pub use keys::keys; 33 | pub use keys::keys_annotator; 34 | pub use keys::unique_key; 35 | pub use num_columns::num_columns; 36 | pub use num_columns::num_columns_annotator; 37 | pub use pulled_up_predicates::pulled_up_predicates; 38 | pub use pulled_up_predicates::pulled_up_predicates_annotator; 39 | pub use row_type::cross_product_row_type; 40 | pub use row_type::row_type; 41 | pub use row_type::row_type_annotator; 42 | pub use subqueries::subgraph_subqueries; 43 | pub use subqueries::subqueries; 44 | 45 | use super::QueryGraph; 46 | 47 | /// Annotators used for explaining query plans. 48 | pub fn default_annotators() -> Vec<&'static dyn Fn(&QueryGraph, NodeId) -> Option> { 49 | vec![ 50 | &num_columns_annotator, 51 | &row_type_annotator, 52 | &pulled_up_predicates_annotator, 53 | &keys_annotator, 54 | &subgraph_correlated_input_refs_annotator, 55 | ] 56 | } 57 | 58 | /// Cache for compute properties 59 | pub struct PropertyCache { 60 | /// Properties computed in a bottom-up manner. 61 | bottom_up_properties: HashMap>>, 62 | /// Properties computed only from the node itself 63 | single_node_properties: HashMap>>, 64 | } 65 | 66 | impl PropertyCache { 67 | pub fn new() -> Self { 68 | Self { 69 | bottom_up_properties: HashMap::new(), 70 | single_node_properties: HashMap::new(), 71 | } 72 | } 73 | 74 | pub fn node_bottom_up_properties( 75 | &mut self, 76 | node_id: NodeId, 77 | ) -> &mut HashMap> { 78 | self.bottom_up_properties 79 | .entry(node_id) 80 | .or_insert_with(|| HashMap::new()) 81 | } 82 | 83 | /// Properties computed using only information contained in the node. 84 | pub fn single_node_properties( 85 | &mut self, 86 | node_id: NodeId, 87 | ) -> &mut HashMap> { 88 | self.single_node_properties 89 | .entry(node_id) 90 | .or_insert_with(|| HashMap::new()) 91 | } 92 | 93 | pub fn invalidate_bottom_up_properties(&mut self, node_id: NodeId) { 94 | self.bottom_up_properties.remove(&node_id); 95 | } 96 | 97 | pub fn invalidate_single_node_properties(&mut self, node_id: NodeId) { 98 | self.single_node_properties.remove(&node_id); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/remove_passthrough_project.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{OptRuleType, SingleReplacementRule}, 4 | properties::num_columns, 5 | NodeId, QueryGraph, QueryNode, 6 | }, 7 | scalar_expr::ScalarExpr, 8 | }; 9 | 10 | pub struct RemovePassthroughProjectRule {} 11 | 12 | impl SingleReplacementRule for RemovePassthroughProjectRule { 13 | fn rule_type(&self) -> OptRuleType { 14 | OptRuleType::Always 15 | } 16 | 17 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 18 | if query_graph 19 | .get_parents(node_id) 20 | .map(|parents| parents.contains(&QueryGraph::ROOT_NODE_ID)) 21 | .unwrap_or(false) 22 | { 23 | return None; 24 | } 25 | if let QueryNode::Project { outputs, input } = query_graph.node(node_id) { 26 | if query_graph.num_parents(node_id) > 0 27 | && outputs.len() == num_columns(query_graph, *input) 28 | && outputs 29 | .iter() 30 | .enumerate() 31 | .all(|(id, expr)| match expr.as_ref() { 32 | ScalarExpr::InputRef { index } => *index == id, 33 | _ => false, 34 | }) 35 | { 36 | return Some(*input); 37 | } 38 | } 39 | None 40 | } 41 | } 42 | 43 | #[cfg(test)] 44 | mod tests { 45 | use crate::{ 46 | query_graph::optimizer::SingleReplacementRule, 47 | query_graph::QueryGraph, 48 | scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef}, 49 | }; 50 | 51 | use super::RemovePassthroughProjectRule; 52 | 53 | #[test] 54 | fn test_remove_passthrough_project() { 55 | let mut query_graph = QueryGraph::new(); 56 | let table_scan_id = query_graph.table_scan(0, 10); 57 | let filter_1: ScalarExprRef = ScalarExpr::input_ref(0) 58 | .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into()) 59 | .into(); 60 | let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]); 61 | let project_id_1 = query_graph.project( 62 | filter_id, 63 | (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(), 64 | ); 65 | let project_id_2 = query_graph.project( 66 | filter_id, 67 | (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(), 68 | ); 69 | let project_id_3 = query_graph.project( 70 | project_id_1, 71 | (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(), 72 | ); 73 | let project_id_4 = query_graph.project( 74 | project_id_2, 75 | (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(), 76 | ); 77 | 78 | let remove_passthrough_project = RemovePassthroughProjectRule {}; 79 | assert!(remove_passthrough_project 80 | .apply(&mut query_graph, filter_id) 81 | .is_none()); 82 | assert_eq!( 83 | remove_passthrough_project 84 | .apply(&mut query_graph, project_id_1) 85 | .unwrap(), 86 | filter_id 87 | ); 88 | assert!(remove_passthrough_project 89 | .apply(&mut query_graph, project_id_2) 90 | .is_none()); 91 | 92 | assert!(remove_passthrough_project 93 | .apply(&mut query_graph, project_id_3) 94 | .is_none()); 95 | assert!(remove_passthrough_project 96 | .apply(&mut query_graph, project_id_4) 97 | .is_none()); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/filter_join_transpose.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | query_graph::{ 3 | optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule}, 4 | properties::{num_columns, pulled_up_predicates}, 5 | JoinType, NodeId, QueryGraph, QueryNode, 6 | }, 7 | scalar_expr::{rewrite::shift_left_input_refs, visitor::collect_input_dependencies}, 8 | }; 9 | 10 | /// Rule that pushes filters through join. 11 | /// 12 | /// Collects the common filter among all the parents of the join and, pushes down those 13 | /// only referring to one join input. 14 | pub struct FilterJoinTransposeRule {} 15 | 16 | impl SingleReplacementRule for FilterJoinTransposeRule { 17 | fn rule_type(&self) -> OptRuleType { 18 | OptRuleType::TopDown 19 | } 20 | 21 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 22 | if let QueryNode::Join { 23 | join_type, 24 | left, 25 | right, 26 | conditions, 27 | } = query_graph.node(node_id) 28 | { 29 | if let Some(common_conditions) = common_parent_filters(query_graph, node_id) { 30 | let left_num_columns = num_columns(query_graph, *left); 31 | let mut left_predicates = Vec::new(); 32 | let mut right_predicates = Vec::new(); 33 | 34 | let known_predicates = pulled_up_predicates(query_graph, node_id); 35 | 36 | let allowed_left_pushdown = match join_type { 37 | JoinType::Semi | JoinType::Anti | JoinType::Inner | JoinType::LeftOuter => true, 38 | JoinType::RightOuter | JoinType::FullOuter => false, 39 | }; 40 | let allowed_right_pushdown = match join_type { 41 | JoinType::Inner | JoinType::RightOuter => true, 42 | JoinType::Semi | JoinType::Anti | JoinType::LeftOuter | JoinType::FullOuter => { 43 | false 44 | } 45 | }; 46 | 47 | for condition in common_conditions.iter() { 48 | if known_predicates.contains(condition) { 49 | // Skip those already known to be enforced either 50 | // by the join or any descendent node. 51 | continue; 52 | } 53 | let dependencies = collect_input_dependencies(condition); 54 | if !dependencies.is_empty() { 55 | if allowed_left_pushdown 56 | && dependencies.iter().all(|x| *x < left_num_columns) 57 | { 58 | left_predicates.push(condition.clone()); 59 | } else if allowed_right_pushdown 60 | && dependencies.iter().all(|x| *x >= left_num_columns) 61 | { 62 | right_predicates 63 | .push(shift_left_input_refs(condition, left_num_columns)); 64 | } 65 | } 66 | } 67 | 68 | if !left_predicates.is_empty() || !right_predicates.is_empty() { 69 | let conditions = conditions.clone(); 70 | let left = *left; 71 | let right = *right; 72 | let join_type = *join_type; 73 | let left = query_graph.filter(left, left_predicates); 74 | let right = query_graph.filter(right, right_predicates); 75 | 76 | return Some(query_graph.join(join_type, left, right, conditions)); 77 | } 78 | } 79 | } 80 | None 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/apply_pruning.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{ 6 | utils::{ 7 | apply_map_to_parents_and_replace_input, required_columns_from_parents, 8 | required_columns_to_column_map, 9 | }, 10 | OptRuleType, Rule, 11 | }, 12 | properties::num_columns, 13 | NodeId, QueryGraph, QueryNode, 14 | }, 15 | scalar_expr::{visitor::store_input_dependencies, ScalarExpr}, 16 | }; 17 | 18 | /// Rule that given a shared apply where all its parents are pruning projections, computes 19 | /// the superset of columns required by all its parents, and prunes the columns not used 20 | /// by any of them, replacing the parents of the apply with projections over the pruned 21 | /// join. 22 | pub struct ApplyPruningRule {} 23 | 24 | impl Rule for ApplyPruningRule { 25 | fn rule_type(&self) -> OptRuleType { 26 | OptRuleType::TopDown 27 | } 28 | 29 | fn apply( 30 | &self, 31 | query_graph: &mut QueryGraph, 32 | node_id: NodeId, 33 | ) -> Option> { 34 | if let QueryNode::Apply { 35 | apply_type, 36 | left, 37 | right, 38 | correlation, 39 | } = query_graph.node(node_id) 40 | { 41 | if let Some(mut required_columns) = required_columns_from_parents(query_graph, node_id) 42 | { 43 | // Add the columns from the LHS referenced by the RHS 44 | for parameter in correlation.parameters.iter() { 45 | store_input_dependencies(parameter, &mut required_columns); 46 | } 47 | if required_columns.len() == num_columns(&query_graph, node_id) { 48 | // All columns are referenced, nothing to prune 49 | return None; 50 | } 51 | let column_map = required_columns_to_column_map(&required_columns); 52 | let left_num_columns = num_columns(query_graph, *left); 53 | let (left_columns, right_columns): (Vec, Vec) = required_columns 54 | .iter() 55 | .sorted() 56 | .partition(|col| **col < left_num_columns); 57 | let left_outputs = left_columns 58 | .iter() 59 | .map(|i| ScalarExpr::InputRef { index: *i }.into()) 60 | .collect::>(); 61 | let right_outputs = right_columns 62 | .iter() 63 | .map(|i| { 64 | ScalarExpr::InputRef { 65 | index: *i - left_num_columns, 66 | } 67 | .into() 68 | }) 69 | .collect::>(); 70 | let correlation = correlation.clone(); 71 | let apply_type = *apply_type; 72 | let left = *left; 73 | let right = *right; 74 | let new_left = query_graph.project(left, left_outputs); 75 | let new_right = query_graph.project(right, right_outputs); 76 | let new_apply = query_graph.add_node(QueryNode::Apply { 77 | correlation, 78 | left: new_left, 79 | right: new_right, 80 | apply_type, 81 | }); 82 | 83 | // Rewrite the parent projections 84 | return Some(apply_map_to_parents_and_replace_input( 85 | query_graph, 86 | node_id, 87 | &column_map, 88 | new_apply, 89 | )); 90 | } 91 | } 92 | None 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/aggregate_project_transpose.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeSet, HashMap}; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | query_graph::{ 7 | optimizer::{utils::sort_projection, OptRuleType, SingleReplacementRule}, 8 | NodeId, QueryGraph, QueryNode, 9 | }, 10 | scalar_expr::{AggregateExpr, ScalarExpr}, 11 | }; 12 | 13 | /// Given an aggregate node over a non-sorted projection, it creates a new aggregate node 14 | /// over a sorted version of the projection, and adds a re-ordering projection on top of 15 | /// the new aggregate node. 16 | pub struct AggregateProjectTransposeRule {} 17 | 18 | impl SingleReplacementRule for AggregateProjectTransposeRule { 19 | fn rule_type(&self) -> OptRuleType { 20 | OptRuleType::BottomUp 21 | } 22 | 23 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 24 | if let QueryNode::Aggregate { 25 | group_key, 26 | aggregates, 27 | input, 28 | } = query_graph.node(node_id) 29 | { 30 | if let QueryNode::Project { 31 | outputs, 32 | input: proj_input, 33 | } = query_graph.node(*input) 34 | { 35 | if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) { 36 | let column_map = reorder_map 37 | .iter() 38 | .enumerate() 39 | .map(|(i, e)| (*e, i)) 40 | .collect::>(); 41 | let new_group_key = group_key 42 | .iter() 43 | .map(|k| *column_map.get(k).unwrap()) 44 | .collect::>(); 45 | let new_aggregates = aggregates 46 | .iter() 47 | .map(|k| { 48 | AggregateExpr { 49 | op: k.op.clone(), 50 | operands: k 51 | .operands 52 | .iter() 53 | .map(|e| *column_map.get(e).unwrap()) 54 | .collect_vec(), 55 | } 56 | .into() 57 | }) 58 | .collect_vec(); 59 | 60 | // Reorder the grouping key elements in a projection over the new 61 | // aggregate node 62 | let group_key_len = group_key.len(); 63 | let aggregates_len = aggregates.len(); 64 | let reordering_proj = group_key 65 | .iter() 66 | .enumerate() 67 | .map(|(i, k)| (i, *column_map.get(k).unwrap())) 68 | .sorted_by_key(|(_, e)| *e) 69 | .enumerate() 70 | .sorted_by_key(|(_, (i, _))| *i) 71 | .map(|(i, _)| i) 72 | // ... and the aggregates 73 | .chain(group_key_len..group_key_len + aggregates_len) 74 | .map(|i| ScalarExpr::input_ref(i).into()) 75 | .collect_vec(); 76 | 77 | let new_project = query_graph.project(*proj_input, sorted_proj); 78 | let new_aggregate = query_graph.add_node(QueryNode::Aggregate { 79 | group_key: new_group_key, 80 | aggregates: new_aggregates, 81 | input: new_project, 82 | }); 83 | return Some(query_graph.project(new_aggregate, reordering_proj)); 84 | } 85 | } 86 | } 87 | None 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /tools/cytoscape.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 27 | 28 | 29 | 30 | 31 |
32 | 33 | 34 | 35 | 106 | 107 | 108 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::data_type::DataType; 6 | 7 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug)] 8 | pub enum Value { 9 | Bool(bool), 10 | Int(i32), 11 | BigInt(i64), 12 | String(String), 13 | List(Vec>), 14 | Any(Box), 15 | Null, 16 | } 17 | 18 | #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)] 19 | pub struct Literal { 20 | pub value: Value, 21 | pub data_type: DataType, 22 | } 23 | 24 | impl fmt::Display for Literal { 25 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 26 | Literal::fmt(f, &self.value, &self.data_type) 27 | } 28 | } 29 | 30 | impl Literal { 31 | pub fn new(value: Value, data_type: DataType) -> Self { 32 | Self { value, data_type } 33 | } 34 | 35 | pub fn build_default(data_type: DataType) -> Self { 36 | Self { 37 | value: default_value_for_data_type(&data_type), 38 | data_type, 39 | } 40 | } 41 | 42 | pub fn build_null(data_type: DataType) -> Self { 43 | Self { 44 | value: Value::Null, 45 | data_type, 46 | } 47 | } 48 | 49 | fn fmt(f: &mut fmt::Formatter, value: &Value, data_type: &DataType) -> fmt::Result { 50 | match (value, data_type) { 51 | (Value::Bool(value), DataType::Bool) => { 52 | if *value { 53 | write!(f, "TRUE") 54 | } else { 55 | write!(f, "FALSE") 56 | } 57 | } 58 | (Value::Int(value), DataType::Int) => write!(f, "{}", value), 59 | (Value::BigInt(value), DataType::BigInt) => write!(f, "{}", value), 60 | // TODO(asenac) escape strings 61 | (Value::String(value), DataType::String) => write!(f, "'{}'", value), 62 | (Value::List(vec), DataType::Array(elem_type)) => { 63 | write!(f, "[")?; 64 | for (i, e) in vec.iter().enumerate() { 65 | if i > 0 { 66 | write!(f, ", ")?; 67 | } 68 | Self::fmt(f, &e, &elem_type)?; 69 | } 70 | write!(f, "]") 71 | } 72 | (Value::List(vec), DataType::Tuple(data_types)) => { 73 | write!(f, "(")?; 74 | for (i, (e, data_type)) in vec.iter().zip(data_types.iter()).enumerate() { 75 | if i > 0 { 76 | write!(f, ", ")?; 77 | } 78 | Self::fmt(f, e, data_type)?; 79 | } 80 | write!(f, ")") 81 | } 82 | (Value::Null, _) => write!(f, "NULL"), 83 | (Value::Any(literal), DataType::Any) => write!(f, "{}", literal), 84 | (_, _) => panic!("unsupported value - data type pair"), 85 | } 86 | } 87 | 88 | pub fn is_null(&self) -> bool { 89 | if let Value::Null = self.value { 90 | true 91 | } else { 92 | false 93 | } 94 | } 95 | } 96 | 97 | pub fn default_value_for_data_type(data_type: &DataType) -> Value { 98 | match data_type { 99 | DataType::Bool => Value::Bool(false), 100 | DataType::Int => Value::Int(0), 101 | DataType::BigInt => Value::BigInt(0), 102 | DataType::String => Value::String("".to_string()), 103 | DataType::Array(_) => Value::List(Vec::new()), 104 | DataType::Tuple(members) => Value::List( 105 | members 106 | .iter() 107 | .map(|nested_type| Box::new(default_value_for_data_type(nested_type))) 108 | .collect_vec(), 109 | ), 110 | DataType::Any => Value::Null, 111 | DataType::Unknown => panic!("cannot create value of unknown type"), 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /tools/d3.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /src/query_graph/properties/subqueries.rs: -------------------------------------------------------------------------------- 1 | use std::{any::TypeId, collections::BTreeSet, rc::Rc}; 2 | 3 | use crate::{ 4 | query_graph::{visitor::QueryGraphPrePostVisitor, NodeId, QueryGraph}, 5 | visitor_utils::PreOrderVisitationResult, 6 | }; 7 | 8 | struct SubqueryPropertyTag; 9 | 10 | /// Returns a set with the subqueries the node contains, if any. 11 | pub fn subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 12 | let type_id = TypeId::of::(); 13 | if let Some(cached) = query_graph 14 | .property_cache 15 | .borrow_mut() 16 | .single_node_properties(node_id) 17 | .get(&type_id) 18 | { 19 | return cached 20 | .downcast_ref::>>() 21 | .unwrap() 22 | .clone(); 23 | } 24 | let subqueries = Rc::new(query_graph.node(node_id).collect_subqueries()); 25 | query_graph 26 | .property_cache 27 | .borrow_mut() 28 | .single_node_properties(node_id) 29 | .insert(type_id, Box::new(subqueries.clone())); 30 | subqueries 31 | } 32 | 33 | /// Retrieve the subqueries within the given subgraph, but not the nested subqueries. 34 | pub fn subgraph_subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 35 | SubgraphSubqueries::subgraph_subqueries(query_graph, node_id) 36 | } 37 | 38 | struct SubgraphSubqueries {} 39 | 40 | impl SubgraphSubqueries { 41 | fn subgraph_subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 42 | let mut visitor = SubgraphSubqueries {}; 43 | query_graph.visit_subgraph(&mut visitor, node_id); 44 | visitor.subgraph_subqueries_unchecked(query_graph, node_id) 45 | } 46 | 47 | fn subgraph_subqueries_unchecked( 48 | &self, 49 | query_graph: &QueryGraph, 50 | node_id: NodeId, 51 | ) -> Rc> { 52 | query_graph 53 | .property_cache 54 | .borrow_mut() 55 | .node_bottom_up_properties(node_id) 56 | .get(&Self::metadata_type_id()) 57 | .unwrap() 58 | .downcast_ref::>>() 59 | .unwrap() 60 | .clone() 61 | } 62 | 63 | fn metadata_type_id() -> TypeId { 64 | TypeId::of::() 65 | } 66 | 67 | fn compute_property_for_node( 68 | &self, 69 | query_graph: &QueryGraph, 70 | node_id: NodeId, 71 | ) -> Rc> { 72 | let mut subqueries: BTreeSet = subqueries(query_graph, node_id).as_ref().clone(); 73 | let query_node = query_graph.node(node_id); 74 | for input in 0..query_node.num_inputs() { 75 | let input_subqueries = 76 | self.subgraph_subqueries_unchecked(query_graph, query_node.get_input(input)); 77 | subqueries.extend(input_subqueries.iter()); 78 | } 79 | Rc::new(subqueries) 80 | } 81 | } 82 | 83 | impl QueryGraphPrePostVisitor for SubgraphSubqueries { 84 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 85 | if query_graph 86 | .property_cache 87 | .borrow_mut() 88 | .node_bottom_up_properties(node_id) 89 | .contains_key(&Self::metadata_type_id()) 90 | { 91 | PreOrderVisitationResult::DoNotVisitInputs 92 | } else { 93 | PreOrderVisitationResult::VisitInputs 94 | } 95 | } 96 | 97 | fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) { 98 | if !query_graph 99 | .property_cache 100 | .borrow_mut() 101 | .node_bottom_up_properties(node_id) 102 | .contains_key(&Self::metadata_type_id()) 103 | { 104 | let correlated_input_refs = self.compute_property_for_node(query_graph, node_id); 105 | query_graph 106 | .property_cache 107 | .borrow_mut() 108 | .node_bottom_up_properties(node_id) 109 | .insert(Self::metadata_type_id(), Box::new(correlated_input_refs)); 110 | } 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/scalar_expr/reduction.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for reducing scalar expression 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | data_type::DataType, 7 | query_graph::{properties::num_columns, QueryGraph}, 8 | }; 9 | 10 | use super::{rewrite::rewrite_expr_pre_post, NaryOp, ScalarExpr, ScalarExprRef, Subquery}; 11 | 12 | /// Reduce the given expression recursively. Keeps trying until the expression cannot 13 | /// be reduced any further. 14 | pub fn reduce_expr_recursively( 15 | expr: &ScalarExprRef, 16 | query_graph: &QueryGraph, 17 | row_type: &[DataType], 18 | ) -> ScalarExprRef { 19 | rewrite_expr_pre_post( 20 | &mut |curr_expr: &ScalarExprRef| reduce_expr(curr_expr, query_graph, row_type), 21 | &expr, 22 | ) 23 | } 24 | 25 | pub fn reduce_expr( 26 | expr: &ScalarExprRef, 27 | query_graph: &QueryGraph, 28 | row_type: &[DataType], 29 | ) -> Option { 30 | if let ScalarExpr::NaryOp { 31 | op: NaryOp::And, 32 | operands, 33 | } = expr.as_ref() 34 | { 35 | if operands.iter().any(|o| **o == ScalarExpr::false_literal()) { 36 | return Some(ScalarExpr::false_literal().into()); 37 | } 38 | if operands.iter().any(|o| **o == ScalarExpr::true_literal()) { 39 | let new_operands = operands 40 | .iter() 41 | .filter(|o| *o.as_ref() == ScalarExpr::true_literal()) 42 | .dedup() 43 | .cloned() 44 | .collect_vec(); 45 | return Some(match new_operands.len() { 46 | 0 => ScalarExpr::true_literal().into(), 47 | 1 => new_operands[0].clone(), 48 | _ => ScalarExpr::nary(NaryOp::And, new_operands).into(), 49 | }); 50 | } 51 | } 52 | if let ScalarExpr::NaryOp { 53 | op: NaryOp::Or, 54 | operands, 55 | } = expr.as_ref() 56 | { 57 | if operands.iter().any(|o| **o == ScalarExpr::true_literal()) { 58 | return Some(ScalarExpr::true_literal().into()); 59 | } 60 | if operands.iter().any(|o| **o == ScalarExpr::false_literal()) { 61 | let new_operands = operands 62 | .iter() 63 | .filter(|o| *o.as_ref() == ScalarExpr::false_literal()) 64 | .dedup() 65 | .cloned() 66 | .collect_vec(); 67 | return Some(match new_operands.len() { 68 | 0 => ScalarExpr::false_literal().into(), 69 | 1 => new_operands[0].clone(), 70 | _ => ScalarExpr::nary(NaryOp::And, new_operands).into(), 71 | }); 72 | } 73 | } 74 | if let ScalarExpr::BinaryOp { op, left, right } = expr.as_ref() { 75 | if op.propagates_null() && (left.is_null() || right.is_null()) { 76 | return Some(ScalarExpr::null_literal(expr.data_type(query_graph, row_type)).into()); 77 | } 78 | } 79 | None 80 | } 81 | 82 | pub fn reduce_and_prune_exists_subplans_recursively( 83 | expr: &ScalarExprRef, 84 | query_graph: &mut QueryGraph, 85 | row_type: &[DataType], 86 | ) -> ScalarExprRef { 87 | rewrite_expr_pre_post( 88 | &mut |curr_expr: &ScalarExprRef| { 89 | prune_exists_subplan(curr_expr, query_graph) 90 | .or_else(|| reduce_expr(curr_expr, query_graph, row_type)) 91 | }, 92 | &expr, 93 | ) 94 | } 95 | 96 | pub fn prune_exists_subplan( 97 | expr: &ScalarExprRef, 98 | query_graph: &mut QueryGraph, 99 | ) -> Option { 100 | if let ScalarExpr::ExistsSubquery { subquery } = expr.as_ref() { 101 | if num_columns(query_graph, subquery.root) > 0 { 102 | // Skip the root node 103 | let subquery_plan = query_graph.node(subquery.root).get_input(0); 104 | let correlation = subquery.correlation.clone(); 105 | let project = query_graph.project(subquery_plan, vec![]); 106 | let new_subquery_root = query_graph.add_subquery(project); 107 | return Some( 108 | ScalarExpr::ExistsSubquery { 109 | subquery: Subquery { 110 | root: new_subquery_root, 111 | correlation, 112 | }, 113 | } 114 | .into(), 115 | ); 116 | } 117 | } 118 | 119 | None 120 | } 121 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/aggregate_simplifier.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeSet; 2 | 3 | use crate::{ 4 | query_graph::{ 5 | optimizer::{OptRuleType, SingleReplacementRule}, 6 | properties::{equivalence_classes, num_columns}, 7 | NodeId, QueryGraph, QueryNode, 8 | }, 9 | scalar_expr::{ 10 | equivalence_class::{find_class, EquivalenceClasses}, 11 | rewrite::{rewrite_expr_post, rewrite_expr_vec}, 12 | ScalarExpr, ScalarExprRef, 13 | }, 14 | }; 15 | 16 | /// Optimization rule that removes grouping key elements from an Aggregate node that 17 | /// are either constants or that can be computed from the remaining ones. 18 | /// 19 | /// Note that the last constant element cannot be removed if it's the only grouping 20 | /// key element, as that would make the aggregate always return a row. 21 | pub struct AggregateSimplifierRule {} 22 | 23 | impl SingleReplacementRule for AggregateSimplifierRule { 24 | fn rule_type(&self) -> OptRuleType { 25 | OptRuleType::Always 26 | } 27 | 28 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 29 | if let QueryNode::Aggregate { 30 | group_key, 31 | aggregates, 32 | input, 33 | } = query_graph.node(node_id) 34 | { 35 | if group_key.len() < 2 { 36 | return None; 37 | } 38 | let classes = equivalence_classes(query_graph, node_id); 39 | if classes.is_empty() { 40 | return None; 41 | } 42 | if let Some((out_col, in_col, expr)) = find_redundant_key(group_key, &classes) { 43 | let num_columns = num_columns(query_graph, node_id); 44 | let new_aggregate = query_graph.add_node(QueryNode::Aggregate { 45 | group_key: group_key 46 | .iter() 47 | .filter(|i| **i != in_col) 48 | .cloned() 49 | .collect(), 50 | aggregates: aggregates.clone(), 51 | input: *input, 52 | }); 53 | let project = (0..num_columns) 54 | .map(|i| { 55 | if i != out_col { 56 | ScalarExpr::input_ref(i).into() 57 | } else { 58 | expr.clone() 59 | } 60 | }) 61 | .collect::>(); 62 | let project = update_project_after_pruning_column(project, out_col); 63 | return Some(query_graph.project(new_aggregate, project)); 64 | } 65 | } 66 | None 67 | } 68 | } 69 | 70 | /// Finds an element of the grouping key that can be written in terms of the rest or 71 | /// it's a constant. 72 | /// 73 | /// Returns a triple with the position in which that element is projected by the Aggregate 74 | /// operator, the input column and the expression it is equivalent to. 75 | fn find_redundant_key( 76 | group_key: &BTreeSet, 77 | classes: &EquivalenceClasses, 78 | ) -> Option<(usize, usize, ScalarExprRef)> { 79 | group_key.iter().enumerate().find_map(|(out_col, in_col)| { 80 | let input_ref = ScalarExpr::input_ref(out_col).into(); 81 | if let Some(class_id) = find_class(&classes, &input_ref) { 82 | let class = &classes[class_id]; 83 | // TODO(asenac) verify that other doesn't reference input_ref 84 | if let Some(other) = class.members.iter().find(|x| **x != input_ref).cloned() { 85 | return Some((out_col, *in_col, other)); 86 | } 87 | } 88 | None 89 | }) 90 | } 91 | 92 | /// Rewrites the expressions in `project` so that all input refs after the pruned column 93 | /// are shifted one position. 94 | fn update_project_after_pruning_column( 95 | project: Vec, 96 | pruned_col: usize, 97 | ) -> Vec { 98 | rewrite_expr_vec(&project, &mut |expr| { 99 | rewrite_expr_post( 100 | &mut |e: &ScalarExprRef| { 101 | if let ScalarExpr::InputRef { index } = e.as_ref() { 102 | if *index > pruned_col { 103 | return Some(ScalarExpr::input_ref(index - 1).into()); 104 | } 105 | } 106 | None 107 | }, 108 | expr, 109 | ) 110 | }) 111 | } 112 | -------------------------------------------------------------------------------- /src/scalar_expr/equivalence_class.rs: -------------------------------------------------------------------------------- 1 | use crate::scalar_expr::*; 2 | use std::collections::{BTreeSet, HashMap}; 3 | 4 | /// An equivalence class is a group of expressions within a given context that 5 | /// are known to always lead to the same values. 6 | /// 7 | /// If `ref_0 = ref_1` is known to be true, then `ref_0` and `ref_1` belong to 8 | /// the same equivalence class. 9 | pub struct EquivalenceClass { 10 | /// Indicates that any of the equality predicates that lead to this class 11 | /// was using the null-rejecting equality operator, ie. the SQL equality 12 | /// operator (`BinaryOp::Eq`), and hence, none of the expression within the 13 | /// class will evaluate to NULL. 14 | pub null_rejecting: bool, 15 | /// The list of expressions belonging to the class. 16 | pub members: BTreeSet, 17 | } 18 | 19 | pub type EquivalenceClasses = Vec; 20 | 21 | impl EquivalenceClass { 22 | fn new(null_rejecting: bool, members: BTreeSet) -> Self { 23 | Self { 24 | null_rejecting, 25 | members, 26 | } 27 | } 28 | 29 | /// Merges two equivalence classes. 30 | fn merge(&mut self, mut other: Self) { 31 | self.null_rejecting = self.null_rejecting || other.null_rejecting; 32 | self.members.append(&mut other.members); 33 | } 34 | } 35 | 36 | /// Returns the index of the class within the given list of classes, if any, 37 | /// the given expression belongs to. 38 | pub fn find_class(classes: &EquivalenceClasses, expr: &ScalarExprRef) -> Option { 39 | classes.iter().enumerate().find_map(|(class_id, class)| { 40 | if class.members.contains(expr) { 41 | Some(class_id) 42 | } else { 43 | None 44 | } 45 | }) 46 | } 47 | 48 | /// Extract the equivalence classes using the equality predicates among the given 49 | /// list of predicates. 50 | /// 51 | /// The same expression cannot belong to the two different classes. If `ref_0 = ref_1` 52 | /// and `ref_1 = ref_2` are present in the given list of predicates, then `ref_0`, `ref_1` 53 | /// and `ref_2` are part of the same equivalence class. 54 | pub fn extract_equivalence_classes(predicates: &Vec) -> EquivalenceClasses { 55 | let mut classes: EquivalenceClasses = Vec::new(); 56 | for predicate in predicates.iter() { 57 | if let ScalarExpr::BinaryOp { op, left, right } = predicate.as_ref() { 58 | let null_rejecting = match op { 59 | BinaryOp::RawEq => false, 60 | BinaryOp::Eq => true, 61 | _ => continue, 62 | }; 63 | let left_class = find_class(&classes, left); 64 | let right_class = find_class(&classes, right); 65 | match (left_class, right_class) { 66 | (None, None) => { 67 | classes.push(EquivalenceClass::new( 68 | null_rejecting, 69 | BTreeSet::from([left.clone(), right.clone()]), 70 | )); 71 | } 72 | (None, Some(class_id)) | (Some(class_id), None) => { 73 | let new_class = 74 | EquivalenceClass::new(true, BTreeSet::from([left.clone(), right.clone()])); 75 | classes[class_id].merge(new_class); 76 | } 77 | (Some(class_left), Some(class_right)) => { 78 | if class_left != class_right { 79 | let min_class = std::cmp::min(class_left, class_right); 80 | let max_class = std::cmp::max(class_left, class_right); 81 | let removed_class = classes.remove(max_class); 82 | classes[min_class].merge(removed_class); 83 | } 84 | } 85 | } 86 | } 87 | } 88 | classes 89 | } 90 | 91 | /// Converts a set of classes into a replacement map in order to replace each member of a 92 | /// class with the first element of the class, ie. with the representative of the class. 93 | pub fn to_replacement_map(classes: &EquivalenceClasses) -> HashMap { 94 | classes 95 | .iter() 96 | .map(|class| { 97 | let first = class.members.first().unwrap(); 98 | class 99 | .members 100 | .iter() 101 | .skip(1) 102 | .filter(|other| !other.is_literal()) 103 | .map(|other| (other.clone(), first.clone())) 104 | }) 105 | .flatten() 106 | .collect::>() 107 | } 108 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/join_pruning.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashSet; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | query_graph::{ 7 | optimizer::{ 8 | utils::{ 9 | apply_map_to_parents_and_replace_input, required_columns_from_parents, 10 | required_columns_to_column_map, 11 | }, 12 | OptRuleType, Rule, 13 | }, 14 | properties::num_columns, 15 | NodeId, QueryGraph, QueryNode, 16 | }, 17 | scalar_expr::{ 18 | rewrite::{apply_column_map, rewrite_expr_vec}, 19 | visitor::store_input_dependencies, 20 | ScalarExpr, 21 | }, 22 | }; 23 | 24 | /// Rule that given a shared join where all its parents are pruning projections, computes 25 | /// the superset of columns required by all its parents, and prunes the columns not used 26 | /// by any of them, replacing the parents of the join with projections over the pruned 27 | /// join. 28 | pub struct JoinPruningRule {} 29 | 30 | impl Rule for JoinPruningRule { 31 | fn rule_type(&self) -> OptRuleType { 32 | OptRuleType::TopDown 33 | } 34 | 35 | fn apply( 36 | &self, 37 | query_graph: &mut QueryGraph, 38 | node_id: NodeId, 39 | ) -> Option> { 40 | if let QueryNode::Join { 41 | join_type, 42 | left, 43 | right, 44 | conditions, 45 | } = query_graph.node(node_id) 46 | { 47 | if let Some(required_columns) = required_columns_from_parents(query_graph, node_id) { 48 | // Rewrite conditions 49 | let column_map = required_columns_to_column_map(&required_columns); 50 | let mut required_columns_including_join = column_map 51 | .iter() 52 | .map(|(col, _)| *col) 53 | .collect::>(); 54 | for condition in conditions.iter() { 55 | store_input_dependencies(condition, &mut required_columns_including_join); 56 | } 57 | if required_columns_including_join.len() == num_columns(&query_graph, node_id) { 58 | return None; 59 | } 60 | let join_column_map = 61 | required_columns_to_column_map(&required_columns_including_join); 62 | let new_conditions = rewrite_expr_vec(conditions, &mut |e| { 63 | apply_column_map(e, &join_column_map).unwrap() 64 | }); 65 | 66 | // Prune the branches 67 | let left_num_columns = num_columns(query_graph, *left); 68 | let (left_columns, right_columns): (Vec, Vec) = 69 | required_columns_including_join 70 | .iter() 71 | .sorted() 72 | .partition(|col| **col < left_num_columns); 73 | let left_outputs = left_columns 74 | .iter() 75 | .map(|i| ScalarExpr::InputRef { index: *i }.into()) 76 | .collect::>(); 77 | let right_outputs = right_columns 78 | .iter() 79 | .map(|i| { 80 | ScalarExpr::InputRef { 81 | index: *i - left_num_columns, 82 | } 83 | .into() 84 | }) 85 | .collect::>(); 86 | let left = *left; 87 | let right = *right; 88 | let join_type = *join_type; 89 | let new_left = query_graph.project(left, left_outputs); 90 | let new_right = query_graph.project(right, right_outputs); 91 | let new_join = query_graph.join(join_type, new_left, new_right, new_conditions); 92 | 93 | // Prune the columns used by the join conditions but not by the parents 94 | let pruning_proj_outputs = required_columns_including_join 95 | .iter() 96 | .sorted() 97 | .enumerate() 98 | .filter(|(_, orig_col)| required_columns.contains(&orig_col)) 99 | .map(|(i, _)| ScalarExpr::input_ref(i).into()) 100 | .collect(); 101 | let pruning_proj = query_graph.project(new_join, pruning_proj_outputs); 102 | 103 | // Rewrite the parent projections 104 | return Some(apply_map_to_parents_and_replace_input( 105 | query_graph, 106 | node_id, 107 | &column_map, 108 | pruning_proj, 109 | )); 110 | } 111 | } 112 | None 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/join_project_transpose.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | query_graph::{ 7 | optimizer::{utils::sort_projection, OptRuleType, SingleReplacementRule}, 8 | properties::num_columns, 9 | NodeId, QueryGraph, QueryNode, 10 | }, 11 | scalar_expr::{rewrite::apply_column_map, ScalarExpr}, 12 | }; 13 | 14 | /// Given a non-sorted projection at the input of a join, it creates a new join with 15 | /// a sorted projection and adds a reordering projection on top of the new join to 16 | /// leave the columns in the same order as before. 17 | /// 18 | /// This is a normalization rule for lifting column reordering towards the root of 19 | /// the query graph. 20 | pub struct JoinProjectTransposeRule {} 21 | 22 | impl SingleReplacementRule for JoinProjectTransposeRule { 23 | fn rule_type(&self) -> OptRuleType { 24 | OptRuleType::BottomUp 25 | } 26 | 27 | fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option { 28 | if let QueryNode::Join { 29 | join_type, 30 | left, 31 | right, 32 | conditions, 33 | } = query_graph.node(node_id) 34 | { 35 | // Lifting projection from the LHS 36 | if let QueryNode::Project { 37 | outputs, 38 | input: proj_input, 39 | } = query_graph.node(*left) 40 | { 41 | if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) { 42 | let left_num_columns = num_columns(query_graph, *left); 43 | let right_num_columns = num_columns(query_graph, *right); 44 | let column_map = reorder_map 45 | .iter() 46 | .enumerate() 47 | .map(|(i, e)| (*e, i)) 48 | .chain( 49 | (left_num_columns..left_num_columns + right_num_columns) 50 | .map(|i| (i, i)), 51 | ) 52 | .collect::>(); 53 | let new_conditions = conditions 54 | .iter() 55 | .map(|c| apply_column_map(c, &column_map).unwrap()) 56 | .collect_vec(); 57 | 58 | let final_project = column_map 59 | .iter() 60 | .sorted_by_key(|(_, j)| *j) 61 | .map(|(i, _)| ScalarExpr::input_ref(*i).into()) 62 | .collect_vec(); 63 | 64 | let join_type = *join_type; 65 | let right = *right; 66 | let new_left = query_graph.project(*proj_input, sorted_proj); 67 | let new_join = query_graph.add_node(QueryNode::Join { 68 | join_type, 69 | conditions: new_conditions, 70 | left: new_left, 71 | right, 72 | }); 73 | 74 | return Some(query_graph.project(new_join, final_project)); 75 | } 76 | } 77 | // Lifting projection from the RHS 78 | if let QueryNode::Project { 79 | outputs, 80 | input: proj_input, 81 | } = query_graph.node(*right) 82 | { 83 | if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) { 84 | let left_num_columns = num_columns(query_graph, *left); 85 | let column_map = reorder_map 86 | .iter() 87 | .enumerate() 88 | .map(|(i, e)| (left_num_columns + *e, left_num_columns + i)) 89 | .chain((0..left_num_columns).map(|i| (i, i))) 90 | .collect::>(); 91 | let new_conditions = conditions 92 | .iter() 93 | .map(|c| apply_column_map(c, &column_map).unwrap()) 94 | .collect_vec(); 95 | 96 | let final_project = column_map 97 | .iter() 98 | .sorted_by_key(|(_, j)| *j) 99 | .map(|(i, _)| ScalarExpr::input_ref(*i).into()) 100 | .collect_vec(); 101 | 102 | let join_type = *join_type; 103 | let left = *left; 104 | let new_right = query_graph.project(*proj_input, sorted_proj); 105 | let new_join = query_graph.add_node(QueryNode::Join { 106 | join_type, 107 | conditions: new_conditions, 108 | left, 109 | right: new_right, 110 | }); 111 | 112 | return Some(query_graph.project(new_join, final_project)); 113 | } 114 | } 115 | } 116 | None 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /tests/testdata/explain/project_normalization.test: -------------------------------------------------------------------------------- 1 | run 2 | project_normalization_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 3 7 | - Row Type: string, string, bool 8 | - Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello') 9 | [3] Project [ref_1, ref_2, eq(ref_2, 'hello')] 10 | - Num Columns: 3 11 | - Row Type: string, string, bool 12 | - Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello') 13 | [2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')] 14 | - Num Columns: 5 15 | - Row Type: string, string, string, string, string 16 | - Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello') 17 | [1] TableScan id: 1 18 | - Num Columns: 5 19 | - Row Type: string, string, string, string, string 20 | 21 | 22 | Optimized: 23 | [0] QueryRoot 24 | - Num Columns: 3 25 | - Row Type: string, string, bool 26 | - Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1) 27 | [4] Project [ref_1, 'hello', TRUE] 28 | - Num Columns: 3 29 | - Row Type: string, string, bool 30 | - Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1) 31 | [5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')] 32 | - Num Columns: 5 33 | - Row Type: string, string, string, string, string 34 | - Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello') 35 | [1] TableScan id: 1 36 | - Num Columns: 5 37 | - Row Type: string, string, string, string, string 38 | 39 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"3","label":"[3] Project [ref_1, ref_2, eq(ref_2, 'hello')]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 40 | step ProjectNormalizationRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"3","label":"[3] Project [ref_1, ref_2, eq(ref_2, 'hello')]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"3","to":"4","label":"ProjectNormalizationRule"}]} 41 | step FilterNormalizationRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"5","label":"[5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"2","to":"5","label":"FilterNormalizationRule"}]} 42 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"}]} 43 | ---- 44 | ---- 45 | -------------------------------------------------------------------------------- /src/query_graph/cloner.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | scalar_expr::ScalarExprRef, 3 | visitor_utils::{PostOrderVisitationResult, PreOrderVisitationResult}, 4 | }; 5 | 6 | use super::{visitor::QueryGraphPrePostVisitorMut, NodeId, QueryGraph, QueryNode}; 7 | 8 | /// Clones a subgraph, except for the pruned subgraphs, applying the given rewrite 9 | /// to the expressions contained by the nodes. 10 | pub fn deep_clone( 11 | query_graph: &mut QueryGraph, 12 | subgraph: NodeId, 13 | prune: &P, 14 | rewrite: &mut R, 15 | ) -> NodeId 16 | where 17 | P: Fn(&QueryGraph, NodeId) -> bool, 18 | R: FnMut(&ScalarExprRef) -> ScalarExprRef, 19 | { 20 | let mut cloner = DeepCloner::new(prune, rewrite); 21 | query_graph.visit_subgraph_mut(&mut cloner, subgraph); 22 | cloner.stack.first().cloned().unwrap() 23 | } 24 | 25 | struct DeepCloner<'a, P, R> 26 | where 27 | P: Fn(&QueryGraph, NodeId) -> bool, 28 | R: FnMut(&ScalarExprRef) -> ScalarExprRef, 29 | { 30 | stack: Vec, 31 | prune: &'a P, 32 | rewrite: &'a mut R, 33 | skip_post: bool, 34 | } 35 | 36 | impl<'a, P, R> DeepCloner<'a, P, R> 37 | where 38 | P: Fn(&QueryGraph, NodeId) -> bool, 39 | R: FnMut(&ScalarExprRef) -> ScalarExprRef, 40 | { 41 | fn new(prune: &'a P, rewrite: &'a mut R) -> Self { 42 | Self { 43 | stack: Vec::new(), 44 | prune, 45 | rewrite, 46 | skip_post: false, 47 | } 48 | } 49 | 50 | fn clone_with_new_inputs(&mut self, query_graph: &mut QueryGraph, node_id: NodeId) -> NodeId { 51 | let mut cloned_node = query_graph.node(node_id).clone(); 52 | let num_inputs = cloned_node.num_inputs(); 53 | let inputs = &self.stack[self.stack.len() - num_inputs..]; 54 | match &mut cloned_node { 55 | QueryNode::Project { outputs, input } => { 56 | outputs.iter_mut().for_each(|e| *e = (self.rewrite)(&e)); 57 | *input = inputs[0]; 58 | } 59 | QueryNode::Filter { conditions, input } => { 60 | conditions.iter_mut().for_each(|e| *e = (self.rewrite)(e)); 61 | *input = inputs[0]; 62 | } 63 | QueryNode::TableScan { 64 | table_id: _, 65 | row_type: _, 66 | } => {} 67 | QueryNode::Join { 68 | join_type: _, 69 | conditions, 70 | left, 71 | right, 72 | } => { 73 | conditions.iter_mut().for_each(|e| *e = (self.rewrite)(e)); 74 | *left = inputs[0]; 75 | *right = inputs[1]; 76 | } 77 | QueryNode::Aggregate { 78 | group_key: _, 79 | aggregates: _, 80 | input, 81 | } => *input = inputs[0], 82 | QueryNode::Union { inputs: inputs_ref } => *inputs_ref = inputs.to_vec(), 83 | QueryNode::Apply { 84 | correlation: _, 85 | left, 86 | right, 87 | apply_type: _, 88 | } => { 89 | *left = inputs[0]; 90 | *right = inputs[1]; 91 | } 92 | QueryNode::QueryRoot { .. } | QueryNode::SubqueryRoot { .. } => { 93 | panic!("Root nodes cannot be cloned") 94 | } 95 | } 96 | self.stack.truncate(self.stack.len() - num_inputs); 97 | query_graph.add_node(cloned_node) 98 | } 99 | } 100 | 101 | impl<'a, P, R> QueryGraphPrePostVisitorMut for DeepCloner<'a, P, R> 102 | where 103 | P: Fn(&QueryGraph, NodeId) -> bool, 104 | R: FnMut(&ScalarExprRef) -> ScalarExprRef, 105 | { 106 | fn visit_pre( 107 | &mut self, 108 | query_graph: &mut QueryGraph, 109 | node_id: &mut NodeId, 110 | ) -> PreOrderVisitationResult { 111 | if (self.prune)(query_graph, *node_id) { 112 | self.skip_post = true; 113 | self.stack.push(*node_id); 114 | PreOrderVisitationResult::DoNotVisitInputs 115 | } else { 116 | PreOrderVisitationResult::VisitInputs 117 | } 118 | } 119 | 120 | fn visit_post( 121 | &mut self, 122 | query_graph: &mut QueryGraph, 123 | node_id: &mut NodeId, 124 | ) -> PostOrderVisitationResult { 125 | if self.skip_post { 126 | self.skip_post = false; 127 | } else { 128 | let cloned_node = self.clone_with_new_inputs(query_graph, *node_id); 129 | self.stack.push(cloned_node); 130 | } 131 | PostOrderVisitationResult::Continue 132 | } 133 | } 134 | 135 | #[cfg(test)] 136 | mod tests { 137 | 138 | use crate::{ 139 | query_graph::QueryGraph, 140 | scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef}, 141 | }; 142 | 143 | use super::deep_clone; 144 | 145 | /// Test that if no expression is rewritten the same node is returned. 146 | #[test] 147 | fn test_no_op() { 148 | let mut query_graph = QueryGraph::new(); 149 | let table_scan_id = query_graph.table_scan(0, 10); 150 | let filter_1: ScalarExprRef = ScalarExpr::input_ref(0) 151 | .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into()) 152 | .into(); 153 | let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]); 154 | let project_id = query_graph.project( 155 | filter_id, 156 | (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(), 157 | ); 158 | query_graph.set_entry_node(project_id); 159 | 160 | let cloned_project_id = deep_clone(&mut query_graph, project_id, &|_, _| false, &mut |e| { 161 | e.clone() 162 | }); 163 | assert_eq!(cloned_project_id, project_id); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /tests/testdata/explain/filter_project_transpose.test: -------------------------------------------------------------------------------- 1 | run 2 | filter_project_transpose_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 3 7 | - Row Type: string, string, string 8 | - Pulled Up Predicates: gt(ref_2, ref_1) 9 | [3] Filter [gt(ref_2, ref_1)] 10 | - Num Columns: 3 11 | - Row Type: string, string, string 12 | - Pulled Up Predicates: gt(ref_2, ref_1) 13 | [2] Project [ref_4, ref_2, ref_3] 14 | - Num Columns: 3 15 | - Row Type: string, string, string 16 | [1] TableScan id: 0 17 | - Num Columns: 5 18 | - Row Type: string, string, string, string, string 19 | 20 | 21 | Optimized: 22 | [0] QueryRoot 23 | - Num Columns: 3 24 | - Row Type: string, string, string 25 | - Pulled Up Predicates: gt(ref_2, ref_1) 26 | [6] Project [ref_4, ref_2, ref_3] 27 | - Num Columns: 3 28 | - Row Type: string, string, string 29 | - Pulled Up Predicates: gt(ref_2, ref_1) 30 | [5] Filter [gt(ref_3, ref_2)] 31 | - Num Columns: 5 32 | - Row Type: string, string, string, string, string 33 | - Pulled Up Predicates: gt(ref_3, ref_2) 34 | [1] TableScan id: 0 35 | - Num Columns: 5 36 | - Row Type: string, string, string, string, string 37 | 38 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 39 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]} 40 | step FilterProjectTransposeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"3","to":"6","label":"FilterProjectTransposeRule"}]} 41 | step ProjectMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"4","to":"6","label":"ProjectMergeRule"}]} 42 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"}]} 43 | ---- 44 | ---- 45 | -------------------------------------------------------------------------------- /tests/testdata/explain/cte_discovery.test: -------------------------------------------------------------------------------- 1 | run rules=(RemovePassthroughProjectRule,CteDiscoveryRule) 2 | cte_discovery_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 5 7 | - Row Type: string, string, string, string, string 8 | - Pulled Up Predicates: lt(ref_0, ref_1) 9 | [5] Union 10 | - Num Columns: 5 11 | - Row Type: string, string, string, string, string 12 | - Pulled Up Predicates: lt(ref_0, ref_1) 13 | [2] Filter [lt(ref_0, ref_1)] 14 | - Num Columns: 5 15 | - Row Type: string, string, string, string, string 16 | - Pulled Up Predicates: lt(ref_0, ref_1) 17 | [1] TableScan id: 1 18 | - Num Columns: 5 19 | - Row Type: string, string, string, string, string 20 | [4] Filter [lt(ref_0, ref_1)] 21 | - Num Columns: 5 22 | - Row Type: string, string, string, string, string 23 | - Pulled Up Predicates: lt(ref_0, ref_1) 24 | [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4] 25 | - Num Columns: 5 26 | - Row Type: string, string, string, string, string 27 | Recurring node 1 28 | 29 | 30 | Optimized: 31 | [0] QueryRoot 32 | - Num Columns: 5 33 | - Row Type: string, string, string, string, string 34 | - Pulled Up Predicates: lt(ref_0, ref_1) 35 | [5] Union 36 | - Num Columns: 5 37 | - Row Type: string, string, string, string, string 38 | - Pulled Up Predicates: lt(ref_0, ref_1) 39 | [2] Filter [lt(ref_0, ref_1)] 40 | - Num Columns: 5 41 | - Row Type: string, string, string, string, string 42 | - Pulled Up Predicates: lt(ref_0, ref_1) 43 | [1] TableScan id: 1 44 | - Num Columns: 5 45 | - Row Type: string, string, string, string, string 46 | Recurring node 2 47 | 48 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"1","label":"input 0"}]} 49 | step RemovePassthroughProjectRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"1","label":"input 0"},{"from":"3","to":"1","label":"RemovePassthroughProjectRule"}]} 50 | step CteDiscoveryRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"1","label":"input 0"},{"from":"4","to":"2","label":"CteDiscoveryRule"}]} 51 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"2","label":"input 1"},{"from":"2","to":"1","label":"input 0"}]} 52 | ---- 53 | ---- 54 | -------------------------------------------------------------------------------- /src/query_graph/properties/num_columns.rs: -------------------------------------------------------------------------------- 1 | use std::any::TypeId; 2 | 3 | use crate::{ 4 | query_graph::{visitor::QueryGraphPrePostVisitor, *}, 5 | visitor_utils::PreOrderVisitationResult, 6 | }; 7 | 8 | /// Returns the number of columns the given node projects, caching the result in the 9 | /// `QueryGraph` metadata. 10 | // TODO(asenac) this will eventually be renamed as `row_type`, returning the data types 11 | // of the columns projected by the given node. 12 | pub fn num_columns(query_graph: &QueryGraph, node_id: NodeId) -> usize { 13 | NumColumns::num_columns(query_graph, node_id) 14 | } 15 | 16 | /// Helper function to include column information when explaining the plan. 17 | pub fn num_columns_annotator(query_graph: &QueryGraph, node_id: NodeId) -> Option { 18 | let num_columns = num_columns(query_graph, node_id); 19 | Some(format!("Num Columns: {}", num_columns,)) 20 | } 21 | 22 | struct NumColumns {} 23 | 24 | impl NumColumns { 25 | fn num_columns(query_graph: &QueryGraph, node_id: NodeId) -> usize { 26 | let mut visitor = NumColumns {}; 27 | query_graph.visit_subgraph(&mut visitor, node_id); 28 | visitor.num_columns_unchecked(query_graph, node_id) 29 | } 30 | 31 | fn num_columns_unchecked(&self, query_graph: &QueryGraph, node_id: NodeId) -> usize { 32 | query_graph 33 | .property_cache 34 | .borrow_mut() 35 | .node_bottom_up_properties(node_id) 36 | .get(&Self::metadata_type_id()) 37 | .unwrap() 38 | .downcast_ref::() 39 | .unwrap() 40 | .clone() 41 | } 42 | 43 | fn metadata_type_id() -> TypeId { 44 | TypeId::of::() 45 | } 46 | 47 | fn compute_num_columns_for_node(&self, query_graph: &QueryGraph, node_id: NodeId) -> usize { 48 | match query_graph.node(node_id) { 49 | QueryNode::QueryRoot { input } => { 50 | if let Some(input) = input { 51 | self.num_columns_unchecked(query_graph, *input) 52 | } else { 53 | 0 54 | } 55 | } 56 | QueryNode::Project { outputs, .. } => outputs.len(), 57 | QueryNode::Filter { input, .. } | QueryNode::SubqueryRoot { input } => { 58 | self.num_columns_unchecked(query_graph, *input) 59 | } 60 | QueryNode::TableScan { row_type, .. } => row_type.len(), 61 | QueryNode::Join { 62 | join_type, 63 | left, 64 | right, 65 | .. 66 | } => { 67 | let left_columns = if join_type.projects_columns_from_left() { 68 | self.num_columns_unchecked(query_graph, *left) 69 | } else { 70 | 0 71 | }; 72 | let right_columns = if join_type.projects_columns_from_right() { 73 | self.num_columns_unchecked(query_graph, *right) 74 | } else { 75 | 0 76 | }; 77 | left_columns + right_columns 78 | } 79 | QueryNode::Aggregate { 80 | group_key, 81 | aggregates, 82 | .. 83 | } => group_key.len() + aggregates.len(), 84 | QueryNode::Union { inputs } => { 85 | if inputs.is_empty() { 86 | 0 87 | } else { 88 | self.num_columns_unchecked(query_graph, inputs[0]) 89 | } 90 | } 91 | QueryNode::Apply { left, right, .. } => { 92 | let left_columns = self.num_columns_unchecked(query_graph, *left); 93 | let right_columns = self.num_columns_unchecked(query_graph, *right); 94 | left_columns + right_columns 95 | } 96 | } 97 | } 98 | } 99 | 100 | impl QueryGraphPrePostVisitor for NumColumns { 101 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 102 | if query_graph 103 | .property_cache 104 | .borrow_mut() 105 | .node_bottom_up_properties(node_id) 106 | .contains_key(&Self::metadata_type_id()) 107 | { 108 | PreOrderVisitationResult::DoNotVisitInputs 109 | } else { 110 | PreOrderVisitationResult::VisitInputs 111 | } 112 | } 113 | 114 | fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) { 115 | if !query_graph 116 | .property_cache 117 | .borrow_mut() 118 | .node_bottom_up_properties(node_id) 119 | .contains_key(&Self::metadata_type_id()) 120 | { 121 | let num_columns = self.compute_num_columns_for_node(query_graph, node_id); 122 | query_graph 123 | .property_cache 124 | .borrow_mut() 125 | .node_bottom_up_properties(node_id) 126 | .insert(Self::metadata_type_id(), Box::new(num_columns)); 127 | } 128 | } 129 | } 130 | 131 | #[cfg(test)] 132 | mod tests { 133 | use crate::{ 134 | query_graph::QueryGraph, 135 | scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef}, 136 | }; 137 | 138 | use super::num_columns; 139 | 140 | #[test] 141 | fn test_num_columns() { 142 | let mut query_graph = QueryGraph::new(); 143 | let table_scan_id = query_graph.table_scan(0, 10); 144 | let filter_1: ScalarExprRef = ScalarExpr::input_ref(0) 145 | .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into()) 146 | .into(); 147 | let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]); 148 | let project_id = query_graph.project( 149 | filter_id, 150 | (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(), 151 | ); 152 | 153 | assert_eq!(num_columns(&query_graph, table_scan_id), 10); 154 | assert_eq!(num_columns(&query_graph, filter_id), 10); 155 | assert_eq!(num_columns(&query_graph, project_id), 5); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /src/query_graph/properties/row_type.rs: -------------------------------------------------------------------------------- 1 | use std::{any::TypeId, rc::Rc}; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | data_type::DataType, 7 | query_graph::{visitor::QueryGraphPrePostVisitor, *}, 8 | visitor_utils::PreOrderVisitationResult, 9 | }; 10 | 11 | /// Returns the row type of the given node. 12 | pub fn row_type(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 13 | RowType::row_type(query_graph, node_id) 14 | } 15 | 16 | /// Given a join node returns the row type of the cross product of its operands. 17 | /// This is the row type the expressions in the join refer to. 18 | pub fn cross_product_row_type(query_graph: &QueryGraph, node_id: NodeId) -> Option> { 19 | if let QueryNode::Join { 20 | join_type: _, 21 | conditions: _, 22 | left, 23 | right, 24 | } = query_graph.node(node_id) 25 | { 26 | Some( 27 | row_type(query_graph, *left) 28 | .iter() 29 | .chain(row_type(query_graph, *right).iter()) 30 | .cloned() 31 | .collect_vec(), 32 | ) 33 | } else { 34 | None 35 | } 36 | } 37 | 38 | /// Helper function to include row type information when explaining the plan. 39 | pub fn row_type_annotator(query_graph: &QueryGraph, node_id: NodeId) -> Option { 40 | let row_type = row_type(query_graph, node_id); 41 | Some(format!( 42 | "Row Type: {}", 43 | row_type 44 | .iter() 45 | .map(|data_type| format!("{}", data_type)) 46 | .collect::>() 47 | .join(", "), 48 | )) 49 | } 50 | 51 | struct RowType {} 52 | 53 | impl RowType { 54 | fn row_type(query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 55 | let mut visitor = RowType {}; 56 | query_graph.visit_subgraph(&mut visitor, node_id); 57 | visitor.row_type_unchecked(query_graph, node_id) 58 | } 59 | 60 | fn row_type_unchecked(&self, query_graph: &QueryGraph, node_id: NodeId) -> Rc> { 61 | query_graph 62 | .property_cache 63 | .borrow_mut() 64 | .node_bottom_up_properties(node_id) 65 | .get(&Self::metadata_type_id()) 66 | .unwrap() 67 | .downcast_ref::>>() 68 | .unwrap() 69 | .clone() 70 | } 71 | 72 | fn metadata_type_id() -> TypeId { 73 | TypeId::of::() 74 | } 75 | 76 | fn compute_row_type_for_node( 77 | &self, 78 | query_graph: &QueryGraph, 79 | node_id: NodeId, 80 | ) -> Rc> { 81 | match query_graph.node(node_id) { 82 | QueryNode::QueryRoot { input } => { 83 | if let Some(input) = input { 84 | self.row_type_unchecked(query_graph, *input) 85 | } else { 86 | Default::default() 87 | } 88 | } 89 | QueryNode::Project { outputs, input } => { 90 | let input_row_type = self.row_type_unchecked(query_graph, *input); 91 | outputs 92 | .iter() 93 | .map(|e| e.data_type(query_graph, &input_row_type[..])) 94 | .collect_vec() 95 | .into() 96 | } 97 | QueryNode::Filter { input, .. } | QueryNode::SubqueryRoot { input } => { 98 | self.row_type_unchecked(query_graph, *input) 99 | } 100 | QueryNode::TableScan { row_type, .. } => row_type.clone(), 101 | QueryNode::Join { 102 | join_type, 103 | left, 104 | right, 105 | .. 106 | } => match join_type { 107 | JoinType::Inner 108 | | JoinType::LeftOuter 109 | | JoinType::RightOuter 110 | | JoinType::FullOuter => self 111 | .row_type_unchecked(query_graph, *left) 112 | .iter() 113 | .chain(self.row_type_unchecked(query_graph, *right).iter()) 114 | .cloned() 115 | .collect_vec() 116 | .into(), 117 | JoinType::Semi | JoinType::Anti => self.row_type_unchecked(query_graph, *left), 118 | }, 119 | QueryNode::Aggregate { 120 | group_key, 121 | aggregates, 122 | input, 123 | } => { 124 | let input_row_type = self.row_type_unchecked(query_graph, *input); 125 | group_key 126 | .iter() 127 | .map(|e| input_row_type[*e].clone()) 128 | .chain(aggregates.iter().map(|agg| agg.data_type(&*input_row_type))) 129 | .collect_vec() 130 | .into() 131 | } 132 | QueryNode::Union { inputs } => { 133 | if inputs.is_empty() { 134 | Default::default() 135 | } else { 136 | self.row_type_unchecked(query_graph, inputs[0]) 137 | } 138 | } 139 | QueryNode::Apply { left, right, .. } => self 140 | .row_type_unchecked(query_graph, *left) 141 | .iter() 142 | .chain(self.row_type_unchecked(query_graph, *right).iter()) 143 | .cloned() 144 | .collect_vec() 145 | .into(), 146 | } 147 | } 148 | } 149 | 150 | impl QueryGraphPrePostVisitor for RowType { 151 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 152 | if query_graph 153 | .property_cache 154 | .borrow_mut() 155 | .node_bottom_up_properties(node_id) 156 | .contains_key(&Self::metadata_type_id()) 157 | { 158 | PreOrderVisitationResult::DoNotVisitInputs 159 | } else { 160 | PreOrderVisitationResult::VisitInputs 161 | } 162 | } 163 | 164 | fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) { 165 | if !query_graph 166 | .property_cache 167 | .borrow_mut() 168 | .node_bottom_up_properties(node_id) 169 | .contains_key(&Self::metadata_type_id()) 170 | { 171 | let row_type = self.compute_row_type_for_node(query_graph, node_id); 172 | query_graph 173 | .property_cache 174 | .borrow_mut() 175 | .node_bottom_up_properties(node_id) 176 | .insert(Self::metadata_type_id(), Box::new(row_type)); 177 | } 178 | } 179 | } 180 | -------------------------------------------------------------------------------- /tests/testdata/explain/keys_filter.test: -------------------------------------------------------------------------------- 1 | run 2 | filter_keys_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 5 7 | - Row Type: string, string, string, string, string 8 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 9 | [2] Filter [FALSE] 10 | - Num Columns: 5 11 | - Row Type: string, string, string, string, string 12 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 13 | [1] TableScan id: 1 14 | - Num Columns: 5 15 | - Row Type: string, string, string, string, string 16 | 17 | 18 | Optimized: 19 | [0] QueryRoot 20 | - Num Columns: 5 21 | - Row Type: string, string, string, string, string 22 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 23 | [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4] 24 | - Num Columns: 5 25 | - Row Type: string, string, string, string, string 26 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 27 | [2] Filter [FALSE] 28 | - Num Columns: 5 29 | - Row Type: string, string, string, string, string 30 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 31 | [1] TableScan id: 1 32 | - Num Columns: 5 33 | - Row Type: string, string, string, string, string 34 | 35 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 36 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"3","label":"TopProjectionRule"}]} 37 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 38 | ---- 39 | ---- 40 | 41 | run 42 | filter_keys_2 43 | ---- 44 | ---- 45 | [0] QueryRoot 46 | - Num Columns: 5 47 | - Row Type: string, string, string, string, string 48 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 49 | [2] Filter [NULL] 50 | - Num Columns: 5 51 | - Row Type: string, string, string, string, string 52 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 53 | [1] TableScan id: 1 54 | - Num Columns: 5 55 | - Row Type: string, string, string, string, string 56 | 57 | 58 | Optimized: 59 | [0] QueryRoot 60 | - Num Columns: 5 61 | - Row Type: string, string, string, string, string 62 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 63 | [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4] 64 | - Num Columns: 5 65 | - Row Type: string, string, string, string, string 66 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 67 | [2] Filter [NULL] 68 | - Num Columns: 5 69 | - Row Type: string, string, string, string, string 70 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 71 | [1] TableScan id: 1 72 | - Num Columns: 5 73 | - Row Type: string, string, string, string, string 74 | 75 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 76 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"3","label":"TopProjectionRule"}]} 77 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 78 | ---- 79 | ---- 80 | -------------------------------------------------------------------------------- /src/query_graph/json.rs: -------------------------------------------------------------------------------- 1 | //! JSON serializer for generating visual representations of the plans. 2 | use std::collections::VecDeque; 3 | 4 | use crate::{ 5 | query_graph::{explain::explain_scalar_expr_vec, *}, 6 | scalar_expr::ScalarExpr, 7 | visitor_utils::PreOrderVisitationResult, 8 | }; 9 | 10 | use super::{ 11 | properties::{default_annotators, subqueries}, 12 | visitor::QueryGraphPrePostVisitor, 13 | }; 14 | 15 | pub struct JsonSerializer<'a> { 16 | annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option>, 17 | included_nodes: HashSet, 18 | graph: Graph, 19 | queue: VecDeque, 20 | } 21 | 22 | impl<'a> JsonSerializer<'a> { 23 | pub fn new(annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option>) -> Self { 24 | Self { 25 | annotators, 26 | included_nodes: HashSet::new(), 27 | graph: Graph::new(), 28 | queue: VecDeque::new(), 29 | } 30 | } 31 | 32 | pub fn new_with_all_annotators() -> Self { 33 | Self::new(default_annotators()) 34 | } 35 | 36 | /// Ensure the given subgraph is included in the output graph. 37 | pub fn add_subgraph(&mut self, query_graph: &QueryGraph, node_id: NodeId) { 38 | self.queue.push_back(node_id); 39 | while let Some(node_id) = self.queue.pop_front() { 40 | query_graph.visit_subgraph(self, node_id); 41 | } 42 | } 43 | 44 | pub fn add_node_replacement( 45 | &mut self, 46 | query_graph: &QueryGraph, 47 | original_node: NodeId, 48 | replacement_node: NodeId, 49 | label: String, 50 | ) { 51 | query_graph.visit_subgraph(self, original_node); 52 | query_graph.visit_subgraph(self, replacement_node); 53 | self.graph.edges.push(Edge { 54 | from: original_node.to_string(), 55 | to: replacement_node.to_string(), 56 | label, 57 | }) 58 | } 59 | 60 | /// Finally, generate the JSON string. 61 | pub fn serialize(&self) -> Result { 62 | serde_json::to_string(&self.graph) 63 | } 64 | } 65 | 66 | impl<'a> QueryGraphPrePostVisitor for JsonSerializer<'a> { 67 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 68 | if !self.included_nodes.insert(node_id) { 69 | return PreOrderVisitationResult::DoNotVisitInputs; 70 | } 71 | let prefix = format!("[{}] ", node_id); 72 | let label = match query_graph.node(node_id) { 73 | QueryNode::QueryRoot { .. } => { 74 | format!("{}QueryRoot", prefix) 75 | } 76 | QueryNode::Project { outputs, .. } => { 77 | format!("{}Project [{}]", prefix, explain_scalar_expr_vec(outputs)) 78 | } 79 | QueryNode::Filter { conditions, .. } => { 80 | format!("{}Filter [{}]", prefix, explain_scalar_expr_vec(conditions),) 81 | } 82 | QueryNode::TableScan { table_id, .. } => { 83 | format!("{}TableScan id: {}", prefix, table_id) 84 | } 85 | QueryNode::Join { 86 | join_type, 87 | conditions, 88 | .. 89 | } => { 90 | format!( 91 | "{}{} Join [{}]", 92 | prefix, 93 | join_type, 94 | explain_scalar_expr_vec(conditions) 95 | ) 96 | } 97 | QueryNode::Aggregate { 98 | group_key, 99 | aggregates, 100 | .. 101 | } => format!( 102 | "{}Aggregate key: [{}], aggregates: [{}]", 103 | prefix, 104 | group_key 105 | .iter() 106 | .map(|e| format!("{}", ScalarExpr::input_ref(*e))) 107 | .collect::>() 108 | .join(", "), 109 | aggregates 110 | .iter() 111 | .map(|e| format!("{}", e)) 112 | .collect::>() 113 | .join(", "), 114 | ), 115 | QueryNode::Union { .. } => format!("{}Union", prefix), 116 | QueryNode::SubqueryRoot { .. } => format!("{}SubqueryRoot", prefix), 117 | QueryNode::Apply { 118 | correlation, 119 | apply_type, 120 | .. 121 | } => { 122 | format!( 123 | "{}{} Apply parameters: [{}]", 124 | prefix, 125 | apply_type, 126 | explain_scalar_expr_vec(&correlation.parameters), 127 | ) 128 | } 129 | }; 130 | let mut annotations = Vec::new(); 131 | for annotator in self.annotators.iter() { 132 | if let Some(annotation) = (annotator)(query_graph, node_id) { 133 | annotations.push(annotation); 134 | } 135 | } 136 | self.graph.nodes.push(Node { 137 | id: node_id.to_string(), 138 | label: label, 139 | annotations, 140 | }); 141 | let node = query_graph.node(node_id); 142 | for i in 0..node.num_inputs() { 143 | let to = node.get_input(i); 144 | self.graph.edges.push(Edge { 145 | from: node_id.to_string(), 146 | to: to.to_string(), 147 | label: format!("input {}", i), 148 | }); 149 | } 150 | 151 | // Link the current node with the subqueries it references 152 | let subqueries = subqueries(query_graph, node_id); 153 | for subquery_root in subqueries.iter() { 154 | self.queue.push_back(*subquery_root); 155 | self.graph.edges.push(Edge { 156 | from: node_id.to_string(), 157 | to: subquery_root.to_string(), 158 | label: format!("subquery({})", subquery_root), 159 | }); 160 | } 161 | return PreOrderVisitationResult::VisitInputs; 162 | } 163 | 164 | fn visit_post(&mut self, _: &QueryGraph, _: NodeId) {} 165 | } 166 | 167 | #[derive(Serialize, Deserialize)] 168 | pub struct Node { 169 | id: String, 170 | label: String, 171 | annotations: Vec, 172 | } 173 | 174 | #[derive(Serialize, Deserialize)] 175 | pub struct Edge { 176 | from: String, 177 | to: String, 178 | label: String, 179 | } 180 | 181 | #[derive(Serialize, Deserialize)] 182 | pub struct Graph { 183 | nodes: Vec, 184 | edges: Vec, 185 | } 186 | 187 | impl Graph { 188 | fn new() -> Self { 189 | Self { 190 | nodes: Vec::new(), 191 | edges: Vec::new(), 192 | } 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /tests/testdata/explain/union_pruning.test: -------------------------------------------------------------------------------- 1 | run 2 | union_pruning 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 2 7 | - Row Type: string, string 8 | [5] Union 9 | - Num Columns: 2 10 | - Row Type: string, string 11 | [3] Project [ref_0, ref_2] 12 | - Num Columns: 2 13 | - Row Type: string, string 14 | [2] Union 15 | - Num Columns: 10 16 | - Row Type: string, string, string, string, string, string, string, string, string, string 17 | [1] TableScan id: 1 18 | - Num Columns: 10 19 | - Row Type: string, string, string, string, string, string, string, string, string, string 20 | Recurring node 1 21 | [4] Project [ref_3, ref_2] 22 | - Num Columns: 2 23 | - Row Type: string, string 24 | Recurring node 2 25 | 26 | 27 | Optimized: 28 | [0] QueryRoot 29 | - Num Columns: 2 30 | - Row Type: string, string 31 | [6] Project [ref_0, ref_1] 32 | - Num Columns: 2 33 | - Row Type: string, string 34 | [5] Union 35 | - Num Columns: 2 36 | - Row Type: string, string 37 | [9] Project [ref_0, ref_1] 38 | - Num Columns: 2 39 | - Row Type: string, string 40 | [8] Union 41 | - Num Columns: 3 42 | - Row Type: string, string, string 43 | [7] Project [ref_0, ref_2, ref_3] 44 | - Num Columns: 3 45 | - Row Type: string, string, string 46 | [1] TableScan id: 1 47 | - Num Columns: 10 48 | - Row Type: string, string, string, string, string, string, string, string, string, string 49 | Recurring node 7 50 | [10] Project [ref_2, ref_1] 51 | - Num Columns: 2 52 | - Row Type: string, string 53 | Recurring node 8 54 | 55 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"}]} 56 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"6","label":"TopProjectionRule"}]} 57 | step UnionPruningRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"9","label":"[9] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"8","label":"[8] Union","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"7","label":"[7] Project [ref_0, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"10","label":"[10] Project [ref_2, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"},{"from":"9","to":"8","label":"input 0"},{"from":"8","to":"7","label":"input 0"},{"from":"8","to":"7","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"3","to":"9","label":"UnionPruningRule"},{"from":"10","to":"8","label":"input 0"},{"from":"4","to":"10","label":"UnionPruningRule"}]} 58 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"9","label":"[9] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"8","label":"[8] Union","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"7","label":"[7] Project [ref_0, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"10","label":"[10] Project [ref_2, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"9","label":"input 0"},{"from":"5","to":"10","label":"input 1"},{"from":"9","to":"8","label":"input 0"},{"from":"8","to":"7","label":"input 0"},{"from":"8","to":"7","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"10","to":"8","label":"input 0"}]} 59 | ---- 60 | ---- 61 | -------------------------------------------------------------------------------- /tests/testdata/explain/union_merge.test: -------------------------------------------------------------------------------- 1 | run 2 | union_merge 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 10 7 | - Row Type: string, string, string, string, string, string, string, string, string, string 8 | [4] Union 9 | - Num Columns: 10 10 | - Row Type: string, string, string, string, string, string, string, string, string, string 11 | [2] Union 12 | - Num Columns: 10 13 | - Row Type: string, string, string, string, string, string, string, string, string, string 14 | [1] TableScan id: 1 15 | - Num Columns: 10 16 | - Row Type: string, string, string, string, string, string, string, string, string, string 17 | Recurring node 1 18 | [3] Union 19 | - Num Columns: 10 20 | - Row Type: string, string, string, string, string, string, string, string, string, string 21 | Recurring node 2 22 | Recurring node 2 23 | Recurring node 1 24 | 25 | 26 | Optimized: 27 | [0] QueryRoot 28 | - Num Columns: 10 29 | - Row Type: string, string, string, string, string, string, string, string, string, string 30 | [5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9] 31 | - Num Columns: 10 32 | - Row Type: string, string, string, string, string, string, string, string, string, string 33 | [6] Union 34 | - Num Columns: 10 35 | - Row Type: string, string, string, string, string, string, string, string, string, string 36 | [1] TableScan id: 1 37 | - Num Columns: 10 38 | - Row Type: string, string, string, string, string, string, string, string, string, string 39 | Recurring node 1 40 | Recurring node 1 41 | Recurring node 1 42 | Recurring node 1 43 | Recurring node 1 44 | Recurring node 1 45 | 46 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"}]} 47 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"},{"from":"5","to":"4","label":"input 0"},{"from":"4","to":"5","label":"TopProjectionRule"}]} 48 | step UnionMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"6","label":"[6] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 3"},{"from":"6","to":"1","label":"input 4"},{"from":"6","to":"1","label":"input 5"},{"from":"6","to":"1","label":"input 6"},{"from":"4","to":"6","label":"UnionMergeRule"}]} 49 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"6","label":"[6] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 3"},{"from":"6","to":"1","label":"input 4"},{"from":"6","to":"1","label":"input 5"},{"from":"6","to":"1","label":"input 6"}]} 50 | ---- 51 | ---- 52 | -------------------------------------------------------------------------------- /src/query_graph/optimizer/rules/common_aggregate_discovery.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeSet, HashMap}; 2 | 3 | use itertools::Itertools; 4 | 5 | use crate::{ 6 | query_graph::{ 7 | optimizer::{OptRuleType, Rule}, 8 | NodeId, QueryGraph, QueryNode, 9 | }, 10 | scalar_expr::{ 11 | rewrite::{dereference_extended_scalar_expr, dereference_scalar_expr}, 12 | AggregateExpr, ExtendedScalarExpr, ExtendedScalarExprRef, ScalarExpr, ScalarExprRef, 13 | ToExtendedExpr, ToScalarExpr, 14 | }, 15 | }; 16 | 17 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)] 18 | struct AggregateKey { 19 | input: NodeId, 20 | group_key: Vec, 21 | } 22 | 23 | struct AggregateValue { 24 | agg_node_id: NodeId, 25 | aggregates: Vec, 26 | } 27 | 28 | /// Rule that folds aggregates over the same input with the same grouping key into a single 29 | /// shared aggregate node. 30 | pub struct CommonAggregateDiscoveryRule {} 31 | 32 | impl Rule for CommonAggregateDiscoveryRule { 33 | fn rule_type(&self) -> OptRuleType { 34 | OptRuleType::RootOnly 35 | } 36 | 37 | fn apply(&self, query_graph: &mut QueryGraph, _: NodeId) -> Option> { 38 | // Collect and classify all the aggregate nodes in the query graph 39 | let mut classified_aggregates: HashMap> = HashMap::new(); 40 | for node_id in query_graph.nodes.keys().sorted() { 41 | if let QueryNode::Aggregate { 42 | group_key, 43 | aggregates, 44 | input, 45 | } = query_graph.node(*node_id) 46 | { 47 | let mut group_key = group_key 48 | .iter() 49 | .map(|i| ScalarExpr::InputRef { index: *i }.into()) 50 | .collect_vec(); 51 | let mut aggregates = aggregates 52 | .iter() 53 | .map(|agg| agg.to_extended_expr()) 54 | .collect_vec(); 55 | let mut normalized_input = *input; 56 | // Let's absorb projections so that we can still fold the following 57 | // two aggregations into a single one: 58 | // 59 | // Aggregate key[ref_0], Aggregates[max(ref_1)] 60 | // Project ref_0, ref_1 + ref_2 61 | // Shared node X 62 | // 63 | // Aggregate key[ref_0], Aggregates[max(ref_1)] 64 | // Project ref_0, ref_1 + ref_3 65 | // Shared node X 66 | // 67 | // The resulting aggregate will be: 68 | // 69 | // Aggregate key[ref_0], Aggregates[max(ref_1), max(ref_2] 70 | // Project ref_0, ref_1 + ref_2, ref_1 + ref_3 71 | // Shared node X 72 | while let QueryNode::Project { outputs, input } = query_graph.node(normalized_input) 73 | { 74 | let extended_outputs = 75 | outputs.iter().map(|e| e.to_extended_expr()).collect_vec(); 76 | for key in group_key.iter_mut() { 77 | *key = dereference_scalar_expr(key, &outputs); 78 | } 79 | for agg in aggregates.iter_mut() { 80 | *agg = dereference_extended_scalar_expr(agg, &extended_outputs); 81 | } 82 | normalized_input = *input; 83 | } 84 | 85 | classified_aggregates 86 | .entry(AggregateKey { 87 | input: normalized_input, 88 | group_key, 89 | }) 90 | .or_insert_with(|| Vec::new()) 91 | .push(AggregateValue { 92 | agg_node_id: *node_id, 93 | aggregates, 94 | }) 95 | } 96 | } 97 | let mut result: Option> = None; 98 | let mut it = classified_aggregates.iter().filter(|(_, v)| v.len() > 1); 99 | while let Some((key, values)) = it.next() { 100 | let new_group_key = (0..key.group_key.len()).collect::>(); 101 | let mut input_project = key.group_key.clone(); 102 | let all_aggregates = values 103 | .iter() 104 | .map(|v| v.aggregates.iter()) 105 | .flatten() 106 | .sorted() 107 | .dedup() 108 | .collect_vec(); 109 | let new_aggregates = all_aggregates 110 | .iter() 111 | .map(|a| match a.as_ref() { 112 | ExtendedScalarExpr::Aggregate { op, operands } => { 113 | let operands = operands 114 | .iter() 115 | .map(|o| { 116 | append_to_vector_if_not_present( 117 | &mut input_project, 118 | o.to_scalar_expr().unwrap(), 119 | ) 120 | }) 121 | .collect_vec(); 122 | AggregateExpr { 123 | op: op.clone(), 124 | operands, 125 | } 126 | .into() 127 | } 128 | _ => panic!(), 129 | }) 130 | .collect_vec(); 131 | let input = query_graph.project(key.input, input_project); 132 | let new_aggregate = query_graph.add_node(QueryNode::Aggregate { 133 | group_key: new_group_key, 134 | aggregates: new_aggregates, 135 | input, 136 | }); 137 | for value in values.iter() { 138 | let project = (0..key.group_key.len()) 139 | .chain(value.aggregates.iter().map(|a| { 140 | key.group_key.len() 141 | + all_aggregates 142 | .iter() 143 | .enumerate() 144 | .find_map(|(i, o)| if *a == **o { Some(i) } else { None }) 145 | // the aggregate must be present in the list of aggregates 146 | .unwrap() 147 | })) 148 | .map(|i| ScalarExpr::input_ref(i).into()) 149 | .collect_vec(); 150 | let new_project = query_graph.project(new_aggregate, project); 151 | result 152 | .get_or_insert_with(|| Vec::new()) 153 | .push((value.agg_node_id, new_project)); 154 | } 155 | } 156 | result 157 | } 158 | } 159 | 160 | fn append_to_vector_if_not_present(vec: &mut Vec, e: E) -> usize { 161 | if let Some(index) = vec 162 | .iter() 163 | .enumerate() 164 | .find_map(|(i, o)| if e == *o { Some(i) } else { None }) 165 | { 166 | index 167 | } else { 168 | vec.push(e); 169 | vec.len() - 1 170 | } 171 | } 172 | -------------------------------------------------------------------------------- /src/query_graph/explain.rs: -------------------------------------------------------------------------------- 1 | use crate::query_graph::visitor::*; 2 | use crate::query_graph::*; 3 | use crate::scalar_expr::ScalarExpr; 4 | use crate::visitor_utils::PreOrderVisitationResult; 5 | 6 | use super::properties::default_annotators; 7 | 8 | /// Utility for explaining a query graph. 9 | pub struct Explainer<'a> { 10 | pub(super) query_graph: &'a QueryGraph, 11 | pub(super) annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option>, 12 | pub(super) leaves: HashSet, 13 | pub(super) entry_point: NodeId, 14 | } 15 | 16 | impl<'a> Explainer<'a> { 17 | pub fn new(query_graph: &'a QueryGraph) -> Self { 18 | Self { 19 | query_graph, 20 | leaves: HashSet::new(), 21 | annotators: Vec::new(), 22 | entry_point: QueryGraph::ROOT_NODE_ID, 23 | } 24 | } 25 | 26 | pub fn with_all_annotators(self) -> Self { 27 | Self { 28 | query_graph: self.query_graph, 29 | leaves: self.leaves, 30 | annotators: default_annotators(), 31 | entry_point: self.entry_point, 32 | } 33 | } 34 | 35 | pub fn with_annotators( 36 | self, 37 | annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option>, 38 | ) -> Self { 39 | Self { 40 | query_graph: self.query_graph, 41 | leaves: self.leaves, 42 | annotators, 43 | entry_point: self.entry_point, 44 | } 45 | } 46 | 47 | /// Treat the given nodes as leaves in the explain plan. 48 | pub fn with_leaves(self, leaves: HashSet) -> Self { 49 | Self { 50 | query_graph: self.query_graph, 51 | leaves, 52 | annotators: self.annotators, 53 | entry_point: self.entry_point, 54 | } 55 | } 56 | 57 | /// Override the entry point for the explain plan. 58 | pub fn with_entry_point(self, entry_point: NodeId) -> Self { 59 | Self { 60 | query_graph: self.query_graph, 61 | leaves: self.leaves, 62 | annotators: self.annotators, 63 | entry_point, 64 | } 65 | } 66 | 67 | /// Generate the explain plan. 68 | pub fn explain(&self) -> String { 69 | let mut explain = ExplainVisitor::new(self); 70 | self.query_graph 71 | .visit_subgraph(&mut explain, self.entry_point); 72 | let subquery_roots = self.query_graph.subquery_roots(); 73 | for subquery_root in subquery_roots { 74 | explain.result += "\n"; 75 | self.query_graph.visit_subgraph(&mut explain, subquery_root); 76 | } 77 | explain.result 78 | } 79 | } 80 | 81 | /// Explain functions. 82 | impl QueryGraph { 83 | /// Returns a stringified version of the query graph. 84 | pub fn explain(&self) -> String { 85 | Explainer::new(&self).explain() 86 | } 87 | 88 | // Explains the query graph annotated with all available properties. 89 | pub fn fully_annotated_explain(&self) -> String { 90 | Explainer::new(self).with_all_annotators().explain() 91 | } 92 | } 93 | 94 | struct ExplainVisitor<'a> { 95 | indentation: usize, 96 | visited_nodes: HashSet, 97 | result: String, 98 | options: &'a Explainer<'a>, 99 | } 100 | 101 | impl<'a> ExplainVisitor<'a> { 102 | fn new(options: &'a Explainer) -> Self { 103 | Self { 104 | indentation: 0, 105 | visited_nodes: HashSet::new(), 106 | result: String::new(), 107 | options, 108 | } 109 | } 110 | } 111 | 112 | impl<'a> QueryGraphPrePostVisitor for ExplainVisitor<'a> { 113 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 114 | let line_prefix = (0..2 * self.indentation).map(|_| ' ').collect::(); 115 | self.indentation += 1; 116 | if !self.visited_nodes.insert(node_id) { 117 | self.result += &format!("{}Recurring node {}\n", line_prefix, node_id); 118 | return PreOrderVisitationResult::DoNotVisitInputs; 119 | } 120 | let prefix = format!("{}[{}] ", line_prefix, node_id); 121 | let node = match query_graph.node(node_id) { 122 | QueryNode::QueryRoot { .. } => { 123 | format!("{}QueryRoot\n", prefix) 124 | } 125 | QueryNode::Project { outputs, .. } => { 126 | format!("{}Project [{}]\n", prefix, explain_scalar_expr_vec(outputs)) 127 | } 128 | QueryNode::Filter { conditions, .. } => { 129 | format!( 130 | "{}Filter [{}]\n", 131 | prefix, 132 | explain_scalar_expr_vec(conditions), 133 | ) 134 | } 135 | QueryNode::TableScan { table_id, .. } => { 136 | format!("{}TableScan id: {}\n", prefix, table_id) 137 | } 138 | QueryNode::Join { 139 | join_type, 140 | conditions, 141 | .. 142 | } => { 143 | format!( 144 | "{}{} Join [{}]\n", 145 | prefix, 146 | join_type, 147 | explain_scalar_expr_vec(conditions) 148 | ) 149 | } 150 | QueryNode::Aggregate { 151 | group_key, 152 | aggregates, 153 | .. 154 | } => format!( 155 | "{}Aggregate key: [{}], aggregates: [{}]\n", 156 | prefix, 157 | group_key 158 | .iter() 159 | .map(|e| format!("{}", ScalarExpr::input_ref(*e))) 160 | .collect::>() 161 | .join(", "), 162 | aggregates 163 | .iter() 164 | .map(|e| format!("{}", e)) 165 | .collect::>() 166 | .join(", "), 167 | ), 168 | QueryNode::Union { .. } => format!("{}Union\n", prefix), 169 | QueryNode::SubqueryRoot { .. } => format!("{}SubqueryRoot\n", prefix), 170 | QueryNode::Apply { 171 | correlation, 172 | apply_type, 173 | .. 174 | } => { 175 | format!( 176 | "{}{} Apply parameters: [{}]\n", 177 | prefix, 178 | apply_type, 179 | explain_scalar_expr_vec(&correlation.parameters), 180 | ) 181 | } 182 | }; 183 | self.result += &node; 184 | 185 | for annotator in self.options.annotators.iter() { 186 | if let Some(annotation) = (annotator)(query_graph, node_id) { 187 | self.result += format!("{} - {}\n", line_prefix, annotation).as_str(); 188 | } 189 | } 190 | 191 | if self.options.leaves.contains(&node_id) { 192 | PreOrderVisitationResult::DoNotVisitInputs 193 | } else { 194 | PreOrderVisitationResult::VisitInputs 195 | } 196 | } 197 | 198 | fn visit_post(&mut self, _: &QueryGraph, _: NodeId) { 199 | self.indentation -= 1; 200 | } 201 | } 202 | 203 | pub(crate) fn explain_scalar_expr_vec(vec: &Vec) -> String { 204 | vec.iter() 205 | .map(|e| format!("{}", e)) 206 | .collect::>() 207 | .join(", ") 208 | } 209 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "autocfg" 7 | version = "1.1.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 10 | 11 | [[package]] 12 | name = "datadriven" 13 | version = "0.7.0" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "df18f0e7700f33562d92872dae54641b07bba9dc6a68faa40bf77c6a19ad6f97" 16 | dependencies = [ 17 | "futures", 18 | "thiserror", 19 | ] 20 | 21 | [[package]] 22 | name = "either" 23 | version = "1.8.1" 24 | source = "registry+https://github.com/rust-lang/crates.io-index" 25 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" 26 | 27 | [[package]] 28 | name = "futures" 29 | version = "0.3.28" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" 32 | dependencies = [ 33 | "futures-channel", 34 | "futures-core", 35 | "futures-executor", 36 | "futures-io", 37 | "futures-sink", 38 | "futures-task", 39 | "futures-util", 40 | ] 41 | 42 | [[package]] 43 | name = "futures-channel" 44 | version = "0.3.28" 45 | source = "registry+https://github.com/rust-lang/crates.io-index" 46 | checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" 47 | dependencies = [ 48 | "futures-core", 49 | "futures-sink", 50 | ] 51 | 52 | [[package]] 53 | name = "futures-core" 54 | version = "0.3.28" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" 57 | 58 | [[package]] 59 | name = "futures-executor" 60 | version = "0.3.28" 61 | source = "registry+https://github.com/rust-lang/crates.io-index" 62 | checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" 63 | dependencies = [ 64 | "futures-core", 65 | "futures-task", 66 | "futures-util", 67 | ] 68 | 69 | [[package]] 70 | name = "futures-io" 71 | version = "0.3.28" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" 74 | 75 | [[package]] 76 | name = "futures-macro" 77 | version = "0.3.28" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" 80 | dependencies = [ 81 | "proc-macro2", 82 | "quote", 83 | "syn", 84 | ] 85 | 86 | [[package]] 87 | name = "futures-sink" 88 | version = "0.3.28" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" 91 | 92 | [[package]] 93 | name = "futures-task" 94 | version = "0.3.28" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" 97 | 98 | [[package]] 99 | name = "futures-util" 100 | version = "0.3.28" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" 103 | dependencies = [ 104 | "futures-channel", 105 | "futures-core", 106 | "futures-io", 107 | "futures-macro", 108 | "futures-sink", 109 | "futures-task", 110 | "memchr", 111 | "pin-project-lite", 112 | "pin-utils", 113 | "slab", 114 | ] 115 | 116 | [[package]] 117 | name = "itertools" 118 | version = "0.11.0" 119 | source = "registry+https://github.com/rust-lang/crates.io-index" 120 | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" 121 | dependencies = [ 122 | "either", 123 | ] 124 | 125 | [[package]] 126 | name = "itoa" 127 | version = "1.0.9" 128 | source = "registry+https://github.com/rust-lang/crates.io-index" 129 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" 130 | 131 | [[package]] 132 | name = "lazy_static" 133 | version = "1.4.0" 134 | source = "registry+https://github.com/rust-lang/crates.io-index" 135 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 136 | 137 | [[package]] 138 | name = "memchr" 139 | version = "2.5.0" 140 | source = "registry+https://github.com/rust-lang/crates.io-index" 141 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 142 | 143 | [[package]] 144 | name = "pin-project-lite" 145 | version = "0.2.10" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57" 148 | 149 | [[package]] 150 | name = "pin-utils" 151 | version = "0.1.0" 152 | source = "registry+https://github.com/rust-lang/crates.io-index" 153 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" 154 | 155 | [[package]] 156 | name = "proc-macro2" 157 | version = "1.0.66" 158 | source = "registry+https://github.com/rust-lang/crates.io-index" 159 | checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" 160 | dependencies = [ 161 | "unicode-ident", 162 | ] 163 | 164 | [[package]] 165 | name = "quote" 166 | version = "1.0.31" 167 | source = "registry+https://github.com/rust-lang/crates.io-index" 168 | checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" 169 | dependencies = [ 170 | "proc-macro2", 171 | ] 172 | 173 | [[package]] 174 | name = "rust-sql" 175 | version = "0.1.0" 176 | dependencies = [ 177 | "datadriven", 178 | "itertools", 179 | "lazy_static", 180 | "serde", 181 | "serde_derive", 182 | "serde_json", 183 | ] 184 | 185 | [[package]] 186 | name = "ryu" 187 | version = "1.0.15" 188 | source = "registry+https://github.com/rust-lang/crates.io-index" 189 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" 190 | 191 | [[package]] 192 | name = "serde" 193 | version = "1.0.171" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" 196 | 197 | [[package]] 198 | name = "serde_derive" 199 | version = "1.0.171" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" 202 | dependencies = [ 203 | "proc-macro2", 204 | "quote", 205 | "syn", 206 | ] 207 | 208 | [[package]] 209 | name = "serde_json" 210 | version = "1.0.103" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b" 213 | dependencies = [ 214 | "itoa", 215 | "ryu", 216 | "serde", 217 | ] 218 | 219 | [[package]] 220 | name = "slab" 221 | version = "0.4.8" 222 | source = "registry+https://github.com/rust-lang/crates.io-index" 223 | checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d" 224 | dependencies = [ 225 | "autocfg", 226 | ] 227 | 228 | [[package]] 229 | name = "syn" 230 | version = "2.0.26" 231 | source = "registry+https://github.com/rust-lang/crates.io-index" 232 | checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" 233 | dependencies = [ 234 | "proc-macro2", 235 | "quote", 236 | "unicode-ident", 237 | ] 238 | 239 | [[package]] 240 | name = "thiserror" 241 | version = "1.0.43" 242 | source = "registry+https://github.com/rust-lang/crates.io-index" 243 | checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42" 244 | dependencies = [ 245 | "thiserror-impl", 246 | ] 247 | 248 | [[package]] 249 | name = "thiserror-impl" 250 | version = "1.0.43" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f" 253 | dependencies = [ 254 | "proc-macro2", 255 | "quote", 256 | "syn", 257 | ] 258 | 259 | [[package]] 260 | name = "unicode-ident" 261 | version = "1.0.11" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" 264 | -------------------------------------------------------------------------------- /src/query_graph/properties/correlated_input_refs.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | any::TypeId, 3 | collections::{BTreeSet, HashMap}, 4 | rc::Rc, 5 | }; 6 | 7 | use itertools::Itertools; 8 | 9 | use crate::{ 10 | query_graph::{visitor::QueryGraphPrePostVisitor, NodeId, QueryGraph, QueryNode}, 11 | scalar_expr::{visitor::visit_expr_pre, ScalarExpr}, 12 | visitor_utils::PreOrderVisitationResult, 13 | }; 14 | 15 | struct CorrelatedInputRefsTag; 16 | 17 | /// Returns a set with the correlated input refs the node contains, if any. 18 | pub fn node_correlated_input_refs( 19 | query_graph: &QueryGraph, 20 | node_id: NodeId, 21 | ) -> Rc>> { 22 | let type_id = TypeId::of::(); 23 | if let Some(cached) = query_graph 24 | .property_cache 25 | .borrow_mut() 26 | .single_node_properties(node_id) 27 | .get(&type_id) 28 | { 29 | return cached 30 | .downcast_ref::>>>() 31 | .unwrap() 32 | .clone(); 33 | } 34 | let mut correlated_cols = HashMap::new(); 35 | let query_node = query_graph.node(node_id); 36 | query_node.visit_scalar_expr(&mut |expr| { 37 | visit_expr_pre(expr, &mut |curr_expr| { 38 | match curr_expr.as_ref() { 39 | ScalarExpr::CorrelatedInputRef { 40 | context_offset, 41 | index, 42 | .. 43 | } => { 44 | correlated_cols 45 | .entry(*context_offset) 46 | .or_insert_with(|| BTreeSet::new()) 47 | .insert(*index); 48 | } 49 | ScalarExpr::ExistsSubquery { subquery } 50 | | ScalarExpr::ScalarSubquery { subquery } 51 | | ScalarExpr::ScalarSubqueryCmp { subquery, .. } => { 52 | let subquery_correlated_input_refs = 53 | subgraph_correlated_input_refs(query_graph, subquery.root); 54 | if subquery.correlation.is_some() { 55 | let subquery_external_correlated_input_refs = 56 | subquery_correlated_input_refs 57 | .iter() 58 | .filter(|(offset, _)| **offset > 0) 59 | .map(|(offset, columns)| (offset - 1, columns.clone())) 60 | .collect::>>(); 61 | merge_correlated_maps( 62 | subquery_external_correlated_input_refs.iter(), 63 | &mut correlated_cols, 64 | ); 65 | } else { 66 | merge_correlated_maps( 67 | subquery_correlated_input_refs.iter(), 68 | &mut correlated_cols, 69 | ); 70 | } 71 | } 72 | _ => (), 73 | } 74 | PreOrderVisitationResult::VisitInputs 75 | }); 76 | }); 77 | 78 | // Store the property in the cache 79 | let correlated_cols = Rc::new(correlated_cols); 80 | query_graph 81 | .property_cache 82 | .borrow_mut() 83 | .single_node_properties(node_id) 84 | .insert(type_id, Box::new(correlated_cols.clone())); 85 | correlated_cols 86 | } 87 | 88 | /// Returns a set with the correlated input refs in the given subplan that escape 89 | /// the context of the subplan. 90 | pub fn subgraph_correlated_input_refs( 91 | query_graph: &QueryGraph, 92 | node_id: NodeId, 93 | ) -> Rc>> { 94 | SubgraphCorrelatedInputRefs::subgraph_correlated_input_refs(query_graph, node_id) 95 | } 96 | 97 | pub fn subgraph_correlated_input_refs_annotator( 98 | query_graph: &QueryGraph, 99 | node_id: NodeId, 100 | ) -> Option { 101 | let correlated_cols = subgraph_correlated_input_refs(query_graph, node_id); 102 | let correlated_cols = correlated_cols 103 | .iter() 104 | .sorted() 105 | .map(|(offset, columns)| { 106 | columns 107 | .iter() 108 | .map(|column| format!("ctx_{}.ref_{}", *offset, column)) 109 | }) 110 | .flatten() 111 | .join(", "); 112 | if correlated_cols.is_empty() { 113 | None 114 | } else { 115 | Some(format!("Correlated References: {}", correlated_cols)) 116 | } 117 | } 118 | 119 | struct SubgraphCorrelatedInputRefs {} 120 | 121 | impl SubgraphCorrelatedInputRefs { 122 | fn subgraph_correlated_input_refs( 123 | query_graph: &QueryGraph, 124 | node_id: NodeId, 125 | ) -> Rc>> { 126 | let mut visitor = SubgraphCorrelatedInputRefs {}; 127 | query_graph.visit_subgraph(&mut visitor, node_id); 128 | visitor.subgraph_correlated_input_refs_unchecked(query_graph, node_id) 129 | } 130 | 131 | fn subgraph_correlated_input_refs_unchecked( 132 | &self, 133 | query_graph: &QueryGraph, 134 | node_id: NodeId, 135 | ) -> Rc>> { 136 | query_graph 137 | .property_cache 138 | .borrow_mut() 139 | .node_bottom_up_properties(node_id) 140 | .get(&Self::metadata_type_id()) 141 | .unwrap() 142 | .downcast_ref::>>>() 143 | .unwrap() 144 | .clone() 145 | } 146 | 147 | fn metadata_type_id() -> TypeId { 148 | TypeId::of::() 149 | } 150 | 151 | fn compute_property_for_node( 152 | &self, 153 | query_graph: &QueryGraph, 154 | node_id: NodeId, 155 | ) -> Rc>> { 156 | // The correlated input refs in the node itself... 157 | let mut correlated_cols: HashMap> = 158 | node_correlated_input_refs(query_graph, node_id) 159 | .as_ref() 160 | .clone(); 161 | // ... and the ones under its child subgraphs, ... 162 | let query_node = query_graph.node(node_id); 163 | for input in 0..query_node.num_inputs() { 164 | let input_correlated_cols = self 165 | .subgraph_correlated_input_refs_unchecked(query_graph, query_node.get_input(input)); 166 | merge_correlated_maps(input_correlated_cols.iter(), &mut correlated_cols); 167 | } 168 | //... but remove ones in the correlation scope the node defines. 169 | if let QueryNode::Apply { .. } = &query_node { 170 | correlated_cols = correlated_cols 171 | .into_iter() 172 | .filter(|(offset, _)| *offset > 0) 173 | .map(|(offset, columns)| (offset - 1, columns)) 174 | .collect(); 175 | } 176 | Rc::new(correlated_cols) 177 | } 178 | } 179 | 180 | impl QueryGraphPrePostVisitor for SubgraphCorrelatedInputRefs { 181 | fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult { 182 | if query_graph 183 | .property_cache 184 | .borrow_mut() 185 | .node_bottom_up_properties(node_id) 186 | .contains_key(&Self::metadata_type_id()) 187 | { 188 | PreOrderVisitationResult::DoNotVisitInputs 189 | } else { 190 | PreOrderVisitationResult::VisitInputs 191 | } 192 | } 193 | 194 | fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) { 195 | if !query_graph 196 | .property_cache 197 | .borrow_mut() 198 | .node_bottom_up_properties(node_id) 199 | .contains_key(&Self::metadata_type_id()) 200 | { 201 | let correlated_input_refs = self.compute_property_for_node(query_graph, node_id); 202 | query_graph 203 | .property_cache 204 | .borrow_mut() 205 | .node_bottom_up_properties(node_id) 206 | .insert(Self::metadata_type_id(), Box::new(correlated_input_refs)); 207 | } 208 | } 209 | } 210 | 211 | fn merge_correlated_maps<'a, I>(src: I, dst: &mut HashMap>) 212 | where 213 | I: Iterator)>, 214 | { 215 | for (context_offset, columns) in src { 216 | dst.entry(*context_offset) 217 | .or_insert_with(|| BTreeSet::new()) 218 | .extend(columns.iter()); 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /tests/testdata/explain/aggregate_project_transpose.test: -------------------------------------------------------------------------------- 1 | run 2 | aggregate_project_transpose_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 5 7 | - Row Type: string, string, string, string, string 8 | - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1] 9 | [3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)] 10 | - Num Columns: 5 11 | - Row Type: string, string, string, string, string 12 | - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1] 13 | [2] Project [ref_4, ref_3, ref_2, ref_1, ref_0] 14 | - Num Columns: 5 15 | - Row Type: string, string, string, string, string 16 | [1] TableScan id: 1 17 | - Num Columns: 5 18 | - Row Type: string, string, string, string, string 19 | 20 | 21 | Optimized: 22 | [0] QueryRoot 23 | - Num Columns: 5 24 | - Row Type: string, string, string, string, string 25 | - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1] 26 | [7] Project [ref_2, ref_1, ref_0, ref_3, ref_4] 27 | - Num Columns: 5 28 | - Row Type: string, string, string, string, string 29 | - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1] 30 | [6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)] 31 | - Num Columns: 5 32 | - Row Type: string, string, string, string, string 33 | - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1] 34 | [1] TableScan id: 1 35 | - Num Columns: 5 36 | - Row Type: string, string, string, string, string 37 | 38 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]} 39 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]} 40 | step AggregateProjectTransposeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"3","to":"7","label":"AggregateProjectTransposeRule"}]} 41 | step ProjectMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"4","to":"7","label":"ProjectMergeRule"}]} 42 | step RemovePassthroughProjectRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"5","to":"1","label":"RemovePassthroughProjectRule"}]} 43 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"}]} 44 | ---- 45 | ---- 46 | -------------------------------------------------------------------------------- /tests/testdata/explain/expression_reduction.test: -------------------------------------------------------------------------------- 1 | run 2 | expression_reduction_1 3 | ---- 4 | ---- 5 | [0] QueryRoot 6 | - Num Columns: 10 7 | - Row Type: string, string, string, string, string, string, string, string, string, string 8 | - Pulled Up Predicates: lt(NULL, ref_1) 9 | [3] Filter [lt(NULL, ref_1)] 10 | - Num Columns: 10 11 | - Row Type: string, string, string, string, string, string, string, string, string, string 12 | - Pulled Up Predicates: lt(NULL, ref_1) 13 | [2] Left Outer Join [eq(NULL, ref_5)] 14 | - Num Columns: 10 15 | - Row Type: string, string, string, string, string, string, string, string, string, string 16 | [1] TableScan id: 1 17 | - Num Columns: 5 18 | - Row Type: string, string, string, string, string 19 | Recurring node 1 20 | 21 | 22 | Optimized: 23 | [0] QueryRoot 24 | - Num Columns: 10 25 | - Row Type: string, string, string, string, string, string, string, string, string, string 26 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 27 | [4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9] 28 | - Num Columns: 10 29 | - Row Type: string, string, string, string, string, string, string, string, string, string 30 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 31 | [5] Filter [NULL] 32 | - Num Columns: 10 33 | - Row Type: string, string, string, string, string, string, string, string, string, string 34 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 35 | [7] Inner Join [NULL] 36 | - Num Columns: 10 37 | - Row Type: string, string, string, string, string, string, string, string, string, string 38 | - Keys: [key: [], lower_bound: 0, upper_bound: 0] 39 | [1] TableScan id: 1 40 | - Num Columns: 5 41 | - Row Type: string, string, string, string, string 42 | Recurring node 1 43 | 44 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"}]} 45 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]} 46 | step ExpressionReductionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"5","to":"2","label":"input 0"},{"from":"3","to":"5","label":"ExpressionReductionRule"}]} 47 | step ExpressionReductionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"6","label":"[6] Left Outer Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"2","to":"6","label":"ExpressionReductionRule"}]} 48 | step OuterToInnerJoinRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"6","label":"[6] Left Outer Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"7","label":"[7] Inner Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"7","to":"1","label":"input 1"},{"from":"6","to":"7","label":"OuterToInnerJoinRule"}]} 49 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"7","label":"[7] Inner Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"7","label":"input 0"},{"from":"7","to":"1","label":"input 0"},{"from":"7","to":"1","label":"input 1"}]} 50 | ---- 51 | ---- 52 | --------------------------------------------------------------------------------