├── .gitignore
├── .images
    └── query-plan1.png
├── src
    ├── lib.rs
    ├── visitor_utils.rs
    ├── data_type.rs
    ├── query_graph
    │   ├── optimizer
    │   │   └── rules
    │   │   │   ├── project_merge.rs
    │   │   │   ├── top_projection.rs
    │   │   │   ├── cte_discovery.rs
    │   │   │   ├── union_merge.rs
    │   │   │   ├── aggregate_remove.rs
    │   │   │   ├── filter_project_transpose.rs
    │   │   │   ├── project_normalization.rs
    │   │   │   ├── identity_join.rs
    │   │   │   ├── mod.rs
    │   │   │   ├── union_pruning.rs
    │   │   │   ├── filter_aggregate_transpose.rs
    │   │   │   ├── prune_aggregate_input.rs
    │   │   │   ├── aggregate_pruning.rs
    │   │   │   ├── expression_reduction.rs
    │   │   │   ├── filter_merge.rs
    │   │   │   ├── filter_normalization.rs
    │   │   │   ├── filter_apply_transpose.rs
    │   │   │   ├── remove_passthrough_project.rs
    │   │   │   ├── filter_join_transpose.rs
    │   │   │   ├── apply_pruning.rs
    │   │   │   ├── aggregate_project_transpose.rs
    │   │   │   ├── aggregate_simplifier.rs
    │   │   │   ├── join_pruning.rs
    │   │   │   ├── join_project_transpose.rs
    │   │   │   └── common_aggregate_discovery.rs
    │   ├── properties
    │   │   ├── equivalence_classes.rs
    │   │   ├── input_dependencies.rs
    │   │   ├── mod.rs
    │   │   ├── subqueries.rs
    │   │   ├── num_columns.rs
    │   │   ├── row_type.rs
    │   │   └── correlated_input_refs.rs
    │   ├── cloner.rs
    │   ├── json.rs
    │   └── explain.rs
    ├── bin
    │   └── dag.rs
    ├── value.rs
    └── scalar_expr
    │   ├── reduction.rs
    │   └── equivalence_class.rs
├── .github
    └── workflows
    │   └── rust.yml
├── Cargo.toml
├── README.md
├── tools
    ├── vis.html
    ├── cytoscape.html
    └── d3.html
├── tests
    └── testdata
    │   └── explain
    │       ├── project_normalization.test
    │       ├── filter_project_transpose.test
    │       ├── cte_discovery.test
    │       ├── keys_filter.test
    │       ├── union_pruning.test
    │       ├── union_merge.test
    │       ├── aggregate_project_transpose.test
    │       └── expression_reduction.test
└── Cargo.lock


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | 


--------------------------------------------------------------------------------
/.images/query-plan1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/asenac/rust-sql-playground/HEAD/.images/query-plan1.png


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | extern crate lazy_static;
 3 | 
 4 | #[macro_use]
 5 | extern crate serde_derive;
 6 | 
 7 | pub mod data_type;
 8 | pub mod query_graph;
 9 | pub mod scalar_expr;
10 | pub mod value;
11 | pub mod visitor_utils;
12 | 


--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
 1 | name: Rust
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "master" ]
 6 |   pull_request:
 7 |     branches: [ "master" ]
 8 | 
 9 | env:
10 |   CARGO_TERM_COLOR: always
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v3
19 |     - name: Build
20 |       run: cargo build --verbose
21 |     - name: Run tests
22 |       run: cargo test --verbose
23 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rust-sql"
 3 | version = "0.1.0"
 4 | edition = "2021"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [lib]
 9 | name = "rust_sql"
10 | path = "src/lib.rs"
11 | 
12 | [[bin]]
13 | name = "dag"
14 | 
15 | [dependencies]
16 | datadriven = "0.7.0"
17 | itertools = "0.11.0"
18 | lazy_static = "1.4.0"
19 | serde = "1.0.166"
20 | serde_derive = "1.0.166"
21 | serde_json = "1.0.99"
22 | 


--------------------------------------------------------------------------------
/src/visitor_utils.rs:
--------------------------------------------------------------------------------
 1 | pub enum PreOrderVisitationResult {
 2 |     VisitInputs,
 3 |     DoNotVisitInputs,
 4 |     Abort,
 5 | }
 6 | 
 7 | pub enum PostOrderVisitationResult {
 8 |     Continue,
 9 |     Abort,
10 | }
11 | 
12 | pub struct VisitationStep<V> {
13 |     pub node: V,
14 |     pub next_child: Option<usize>,
15 | }
16 | 
17 | impl<V> VisitationStep<V> {
18 |     pub fn new(node: V) -> Self {
19 |         Self {
20 |             node,
21 |             next_child: None,
22 |         }
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SQL query compiler playground
 2 | 
 3 | This is a SQL query compiler written in Rust mainly for learning and blogging purposes.
 4 | 
 5 | There is no SQL parser yet and its overall functionality is very limited, although the
 6 | logical optimizer is getting real.
 7 | 
 8 | ## Blog posts
 9 | 
10 | * [Part one, the query plan representation](https://andres.senac.es/posts/query-compiler-part-one/)
11 | * [Part two, the query rewrite driver](https://andres.senac.es/posts/query-compiler-part-two-rule-driver/)
12 | 
13 | ## Visualizing query plans
14 | 
15 | `JsonSerializer` utility can be used to dump the query plan in JSON format that can be
16 | rendered with any of the utilities in `tools` folder, using different graph rendering
17 | libraries.
18 | 
19 | ![Query plan][query-plan-1]
20 | 
21 | [query-plan-1]: .images/query-plan1.png


--------------------------------------------------------------------------------
/src/data_type.rs:
--------------------------------------------------------------------------------
 1 | use core::fmt;
 2 | 
 3 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug)]
 4 | pub enum DataType {
 5 |     Bool,
 6 |     Int,
 7 |     BigInt,
 8 |     String,
 9 |     Unknown,
10 |     Any,
11 |     Array(Box<DataType>),
12 |     Tuple(Vec<DataType>),
13 | }
14 | 
15 | impl fmt::Display for DataType {
16 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
17 |         match self {
18 |             DataType::Bool => write!(f, "bool"),
19 |             DataType::Int => write!(f, "int"),
20 |             DataType::BigInt => write!(f, "bigint"),
21 |             DataType::String => write!(f, "string"),
22 |             DataType::Unknown => write!(f, "unknown"),
23 |             DataType::Any => write!(f, "any"),
24 |             DataType::Array(elem_type) => write!(f, "array({})", elem_type),
25 |             DataType::Tuple(elem_types) => {
26 |                 write!(f, "tuple(")?;
27 |                 for (i, data_type) in elem_types.iter().enumerate() {
28 |                     if i > 0 {
29 |                         write!(f, ", ")?;
30 |                     }
31 |                     write!(f, "{}", data_type)?;
32 |                 }
33 |                 write!(f, ")")
34 |             }
35 |         }
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/project_merge.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{OptRuleType, SingleReplacementRule},
 4 |         NodeId, QueryGraph, QueryNode,
 5 |     },
 6 |     scalar_expr::rewrite::dereference_scalar_expr,
 7 | };
 8 | 
 9 | pub struct ProjectMergeRule {}
10 | 
11 | impl SingleReplacementRule for ProjectMergeRule {
12 |     fn rule_type(&self) -> OptRuleType {
13 |         OptRuleType::TopDown
14 |     }
15 | 
16 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
17 |         if let QueryNode::Project { input, outputs } = query_graph.node(node_id) {
18 |             if let QueryNode::Project {
19 |                 input: child_input,
20 |                 outputs: child_outputs,
21 |             } = query_graph.node(*input)
22 |             {
23 |                 return Some(
24 |                     query_graph.project(
25 |                         *child_input,
26 |                         outputs
27 |                             .clone()
28 |                             .into_iter()
29 |                             .map(|x| dereference_scalar_expr(&x, &child_outputs))
30 |                             .collect(),
31 |                     ),
32 |                 );
33 |             }
34 |         }
35 |         None
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/equivalence_classes.rs:
--------------------------------------------------------------------------------
 1 | use std::{any::TypeId, rc::Rc};
 2 | 
 3 | use crate::{
 4 |     query_graph::{NodeId, QueryGraph},
 5 |     scalar_expr::equivalence_class::{extract_equivalence_classes, EquivalenceClasses},
 6 | };
 7 | 
 8 | use super::pulled_up_predicates;
 9 | 
10 | /// Property derived from the pulled up predicates.
11 | pub fn equivalence_classes(query_graph: &QueryGraph, node_id: NodeId) -> Rc<EquivalenceClasses> {
12 |     let type_id = TypeId::of::<Rc<EquivalenceClasses>>();
13 |     if let Some(cached) = query_graph
14 |         .property_cache
15 |         .borrow_mut()
16 |         .node_bottom_up_properties(node_id)
17 |         .get(&type_id)
18 |     {
19 |         return cached
20 |             .downcast_ref::<Rc<EquivalenceClasses>>()
21 |             .unwrap()
22 |             .clone();
23 |     }
24 |     // Do not use an else branch since we need to release the borrow above
25 |     // in order to compute the pulled up predicates
26 |     let predicates = pulled_up_predicates(query_graph, node_id);
27 |     let classes = Rc::new(extract_equivalence_classes(&predicates));
28 |     query_graph
29 |         .property_cache
30 |         .borrow_mut()
31 |         .node_bottom_up_properties(node_id)
32 |         .insert(type_id, Box::new(classes.clone()));
33 |     classes
34 | }
35 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/top_projection.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{OptRuleType, SingleReplacementRule},
 6 |         properties::num_columns,
 7 |         NodeId, QueryGraph, QueryNode,
 8 |     },
 9 |     scalar_expr::ScalarExpr,
10 | };
11 | 
12 | /// Rule that ensures the root node of the query is a projection.
13 | ///
14 | /// Adding a projection as the top level node helps with column pruning
15 | /// as columns that are bound to other columns or constants can be pruned.
16 | pub struct TopProjectionRule {}
17 | 
18 | impl SingleReplacementRule for TopProjectionRule {
19 |     fn rule_type(&self) -> OptRuleType {
20 |         OptRuleType::RootOnly
21 |     }
22 | 
23 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
24 |         if let QueryNode::Project { .. } = query_graph.node(node_id) {
25 |             None
26 |         } else {
27 |             let num_columns = num_columns(query_graph, node_id);
28 |             Some(
29 |                 query_graph.project(
30 |                     node_id,
31 |                     (0..num_columns)
32 |                         .map(|i| ScalarExpr::input_ref(i).into())
33 |                         .collect_vec(),
34 |                 ),
35 |             )
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/cte_discovery.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::query_graph::{
 4 |     optimizer::{OptRuleType, Rule},
 5 |     NodeId, QueryGraph,
 6 | };
 7 | 
 8 | /// Finds duplicated nodes in the query graph and replaces them with the equivalent node
 9 | /// with the lowest node ID.
10 | pub struct CteDiscoveryRule {}
11 | 
12 | impl Rule for CteDiscoveryRule {
13 |     fn rule_type(&self) -> OptRuleType {
14 |         OptRuleType::RootOnly
15 |     }
16 | 
17 |     fn apply(&self, query_graph: &mut QueryGraph, _: NodeId) -> Option<Vec<(NodeId, NodeId)>> {
18 |         let mut node_ids = query_graph.nodes.keys().cloned().collect_vec();
19 |         node_ids.sort();
20 |         let replacements = node_ids
21 |             .iter()
22 |             .enumerate()
23 |             .filter_map(|(i, orig_node_id)| {
24 |                 let node = query_graph.node(*orig_node_id);
25 |                 node_ids
26 |                     .iter()
27 |                     .take(i)
28 |                     .find(|replacement_node_id| query_graph.node(**replacement_node_id) == node)
29 |                     .map(|replacement_node_id| (*orig_node_id, *replacement_node_id))
30 |             })
31 |             .collect_vec();
32 |         if replacements.is_empty() {
33 |             None
34 |         } else {
35 |             Some(replacements)
36 |         }
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/union_merge.rs:
--------------------------------------------------------------------------------
 1 | use crate::query_graph::{
 2 |     optimizer::{OptRuleType, SingleReplacementRule},
 3 |     NodeId, QueryGraph, QueryNode,
 4 | };
 5 | 
 6 | pub struct UnionMergeRule {}
 7 | 
 8 | impl SingleReplacementRule for UnionMergeRule {
 9 |     fn rule_type(&self) -> OptRuleType {
10 |         OptRuleType::TopDown
11 |     }
12 | 
13 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
14 |         if let QueryNode::Union { inputs } = query_graph.node(node_id) {
15 |             let mut new_inputs = inputs.clone();
16 |             let mut any = false;
17 |             while let Some((idx, inputs)) =
18 |                 new_inputs.iter().enumerate().find_map(|(idx, input)| {
19 |                     if let QueryNode::Union { inputs } = query_graph.node(*input) {
20 |                         Some((idx, inputs.clone()))
21 |                     } else {
22 |                         None
23 |                     }
24 |                 })
25 |             {
26 |                 let mut flattened_union = (0..idx)
27 |                     .map(|i| new_inputs[i].clone())
28 |                     .collect::<Vec<usize>>();
29 |                 flattened_union.extend(inputs);
30 |                 flattened_union.extend((idx + 1..new_inputs.len()).map(|i| new_inputs[i].clone()));
31 |                 new_inputs = flattened_union;
32 |                 any = true;
33 |             }
34 |             if any {
35 |                 return Some(query_graph.add_node(QueryNode::Union { inputs: new_inputs }));
36 |             }
37 |         }
38 |         None
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/aggregate_remove.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{OptRuleType, SingleReplacementRule},
 4 |         properties::unique_key,
 5 |         NodeId, QueryGraph, QueryNode,
 6 |     },
 7 |     scalar_expr::ScalarExpr,
 8 | };
 9 | 
10 | pub struct AggregateRemoveRule {}
11 | 
12 | impl SingleReplacementRule for AggregateRemoveRule {
13 |     fn rule_type(&self) -> OptRuleType {
14 |         OptRuleType::Always
15 |     }
16 | 
17 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
18 |         if let QueryNode::Aggregate {
19 |             group_key,
20 |             aggregates,
21 |             input,
22 |         } = query_graph.node(node_id)
23 |         {
24 |             if !group_key.is_empty() {
25 |                 if let Some(input_unique_key) = unique_key(query_graph, *input) {
26 |                     let group_key_expr = group_key
27 |                         .iter()
28 |                         .map(|col| ScalarExpr::input_ref(*col).into())
29 |                         .collect::<Vec<_>>();
30 |                     if input_unique_key.iter().all(|e| group_key_expr.contains(e)) {
31 |                         let mut values = group_key_expr;
32 |                         values.extend(
33 |                             aggregates
34 |                                 .iter()
35 |                                 .map(|aggregate| aggregate.on_unique_tuple()),
36 |                         );
37 |                         return Some(query_graph.project(*input, values));
38 |                     }
39 |                 }
40 |             }
41 |         }
42 |         None
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_project_transpose.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{OptRuleType, SingleReplacementRule},
 4 |         NodeId, QueryGraph, QueryNode,
 5 |     },
 6 |     scalar_expr::rewrite::dereference_scalar_expr,
 7 | };
 8 | 
 9 | /// Given a Filter node on top of a Project node, it transposes them by creating a
10 | /// new Filter node and a new Project node on top of it.
11 | ///
12 | /// If the Project node is a shared node, ie. it has multiple parents, the original
13 | /// Project node will still be referenced by the rest of its parents. In our model,
14 | /// we are only interested in preserving shared Joins, Aggregates and any node
15 | /// performing some expensive operation.
16 | pub struct FilterProjectTransposeRule {}
17 | 
18 | impl SingleReplacementRule for FilterProjectTransposeRule {
19 |     fn rule_type(&self) -> OptRuleType {
20 |         OptRuleType::TopDown
21 |     }
22 | 
23 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
24 |         if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) {
25 |             if let QueryNode::Project {
26 |                 outputs,
27 |                 input: proj_input,
28 |             } = query_graph.node(*input)
29 |             {
30 |                 let new_conditions = conditions
31 |                     .iter()
32 |                     .map(|c| dereference_scalar_expr(c, outputs))
33 |                     .collect::<Vec<_>>();
34 |                 let outputs = outputs.clone();
35 |                 let new_filter = query_graph.filter(*proj_input, new_conditions);
36 |                 return Some(query_graph.project(new_filter, outputs));
37 |             }
38 |         }
39 |         None
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/project_normalization.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{OptRuleType, SingleReplacementRule},
 4 |         properties::{equivalence_classes, pulled_up_predicates},
 5 |         NodeId, QueryGraph, QueryNode,
 6 |     },
 7 |     scalar_expr::{
 8 |         equivalence_class::to_replacement_map, rewrite::replace_sub_expressions_pre, ScalarExpr,
 9 |         ScalarExprRef,
10 |     },
11 | };
12 | 
13 | /// Replace sub-expressions in a projection with the representative of the equivalence
14 | /// class they belong to.
15 | pub struct ProjectNormalizationRule {}
16 | 
17 | impl SingleReplacementRule for ProjectNormalizationRule {
18 |     fn rule_type(&self) -> OptRuleType {
19 |         OptRuleType::Always
20 |     }
21 | 
22 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
23 |         if let QueryNode::Project { outputs, input } = query_graph.node(node_id) {
24 |             let classes = equivalence_classes(query_graph, *input);
25 |             let predicates = pulled_up_predicates(query_graph, *input);
26 |             let mut replacement_map = to_replacement_map(&classes);
27 |             let true_literal: ScalarExprRef = ScalarExpr::true_literal().into();
28 |             replacement_map.extend(
29 |                 predicates
30 |                     .iter()
31 |                     .map(|predicate| (predicate.clone(), true_literal.clone())),
32 |             );
33 |             let new_outputs = outputs
34 |                 .iter()
35 |                 .map(|expr| replace_sub_expressions_pre(expr, &replacement_map))
36 |                 .collect::<Vec<_>>();
37 | 
38 |             if new_outputs.iter().zip(outputs.iter()).any(|(x, y)| x != y) {
39 |                 return Some(query_graph.project(*input, new_outputs));
40 |             }
41 |         }
42 |         None
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/identity_join.rs:
--------------------------------------------------------------------------------
 1 | use crate::query_graph::{
 2 |     optimizer::{OptRuleType, SingleReplacementRule},
 3 |     properties::{empty_key, num_columns},
 4 |     JoinType, NodeId, QueryGraph, QueryNode,
 5 | };
 6 | 
 7 | /// Removes joins where one of the inputs is a relation always projecting a single
 8 | /// row and has no columns.
 9 | pub struct IdentityJoinRule;
10 | 
11 | impl SingleReplacementRule for IdentityJoinRule {
12 |     fn rule_type(&self) -> OptRuleType {
13 |         OptRuleType::Always
14 |     }
15 | 
16 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
17 |         if let QueryNode::Join {
18 |             join_type: JoinType::Inner,
19 |             conditions,
20 |             left,
21 |             right,
22 |         } = query_graph.node(node_id)
23 |         {
24 |             let left_num_columns = num_columns(query_graph, *left);
25 |             let right_num_columns = num_columns(query_graph, *right);
26 |             let left_is_identity = left_num_columns == 0
27 |                 && empty_key(query_graph, *left)
28 |                     .and_then(|key| Some(key.lower_bound == 1 && key.upper_bound == Some(1)))
29 |                     .unwrap_or(false);
30 |             let right_is_identity = right_num_columns == 0
31 |                 && empty_key(query_graph, *right)
32 |                     .and_then(|key| Some(key.lower_bound == 1 && key.upper_bound == Some(1)))
33 |                     .unwrap_or(false);
34 |             let non_identity_relation = match (left_is_identity, right_is_identity) {
35 |                 (true, _) => Some(*right),
36 |                 (_, true) => Some(*left),
37 |                 _ => None,
38 |             };
39 |             if let Some(non_identity_relation) = non_identity_relation {
40 |                 return Some(query_graph.filter(non_identity_relation, conditions.clone()));
41 |             }
42 |         }
43 |         None
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/mod.rs:
--------------------------------------------------------------------------------
 1 | mod aggregate_project_transpose;
 2 | mod aggregate_pruning;
 3 | mod aggregate_remove;
 4 | mod aggregate_simplifier;
 5 | mod apply_pruning;
 6 | mod common_aggregate_discovery;
 7 | mod cte_discovery;
 8 | mod equality_propagation;
 9 | mod expression_reduction;
10 | mod filter_aggregate_transpose;
11 | mod filter_apply_transpose;
12 | mod filter_join_transpose;
13 | mod filter_merge;
14 | mod filter_normalization;
15 | mod filter_project_transpose;
16 | mod identity_join;
17 | mod join_project_transpose;
18 | mod join_pruning;
19 | mod outer_to_inner_join;
20 | mod project_merge;
21 | mod project_normalization;
22 | mod prune_aggregate_input;
23 | mod remove_passthrough_project;
24 | mod top_projection;
25 | mod union_merge;
26 | mod union_pruning;
27 | 
28 | pub use aggregate_project_transpose::AggregateProjectTransposeRule;
29 | pub use aggregate_pruning::AggregatePruningRule;
30 | pub use aggregate_remove::AggregateRemoveRule;
31 | pub use aggregate_simplifier::AggregateSimplifierRule;
32 | pub use apply_pruning::ApplyPruningRule;
33 | pub use common_aggregate_discovery::CommonAggregateDiscoveryRule;
34 | pub use cte_discovery::CteDiscoveryRule;
35 | pub use equality_propagation::EqualityPropagationRule;
36 | pub use expression_reduction::ExpressionReductionRule;
37 | pub use filter_aggregate_transpose::FilterAggregateTransposeRule;
38 | pub use filter_apply_transpose::FilterApplyTransposeRule;
39 | pub use filter_join_transpose::FilterJoinTransposeRule;
40 | pub use filter_merge::FilterMergeRule;
41 | pub use filter_normalization::FilterNormalizationRule;
42 | pub use filter_project_transpose::FilterProjectTransposeRule;
43 | pub use identity_join::IdentityJoinRule;
44 | pub use join_project_transpose::JoinProjectTransposeRule;
45 | pub use join_pruning::JoinPruningRule;
46 | pub use outer_to_inner_join::OuterToInnerJoinRule;
47 | pub use project_merge::ProjectMergeRule;
48 | pub use project_normalization::ProjectNormalizationRule;
49 | pub use prune_aggregate_input::PruneAggregateInputRule;
50 | pub use remove_passthrough_project::RemovePassthroughProjectRule;
51 | pub use top_projection::TopProjectionRule;
52 | pub use union_merge::UnionMergeRule;
53 | pub use union_pruning::UnionPruningRule;
54 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/input_dependencies.rs:
--------------------------------------------------------------------------------
 1 | use std::{any::TypeId, collections::HashSet, rc::Rc};
 2 | 
 3 | use crate::{
 4 |     query_graph::{NodeId, QueryGraph, QueryNode},
 5 |     scalar_expr::visitor::store_input_dependencies,
 6 | };
 7 | 
 8 | use super::num_columns;
 9 | 
10 | struct InputDependenciesTag;
11 | 
12 | pub fn input_dependencies(query_graph: &QueryGraph, node_id: NodeId) -> Rc<HashSet<usize>> {
13 |     let type_id = TypeId::of::<InputDependenciesTag>();
14 |     if let Some(cached) = query_graph
15 |         .property_cache
16 |         .borrow_mut()
17 |         .single_node_properties(node_id)
18 |         .get(&type_id)
19 |     {
20 |         return cached.downcast_ref::<Rc<HashSet<usize>>>().unwrap().clone();
21 |     }
22 |     let mut dependencies = HashSet::new();
23 |     match query_graph.node(node_id) {
24 |         QueryNode::QueryRoot { input } => {
25 |             if let Some(input) = input {
26 |                 dependencies.extend(0..num_columns(query_graph, *input));
27 |             }
28 |         }
29 |         QueryNode::Project { outputs: exprs, .. }
30 |         | QueryNode::Join {
31 |             conditions: exprs, ..
32 |         } => exprs
33 |             .iter()
34 |             .for_each(|e| store_input_dependencies(e, &mut dependencies)),
35 |         QueryNode::TableScan { .. } => {}
36 |         QueryNode::Aggregate {
37 |             group_key,
38 |             aggregates,
39 |             ..
40 |         } => {
41 |             dependencies.extend(group_key.iter());
42 |             for aggregate in aggregates.iter() {
43 |                 dependencies.extend(aggregate.operands.iter());
44 |             }
45 |         }
46 |         QueryNode::Filter {
47 |             conditions: exprs, ..
48 |         } => exprs
49 |             .iter()
50 |             .for_each(|e| store_input_dependencies(e, &mut dependencies)),
51 |         QueryNode::Union { .. } | QueryNode::SubqueryRoot { .. } | QueryNode::Apply { .. } => {
52 |             dependencies.extend(0..num_columns(query_graph, node_id))
53 |         }
54 |     }
55 |     let dependencies = Rc::new(dependencies);
56 |     query_graph
57 |         .property_cache
58 |         .borrow_mut()
59 |         .single_node_properties(node_id)
60 |         .insert(type_id, Box::new(dependencies.clone()));
61 |     dependencies
62 | }
63 | 


--------------------------------------------------------------------------------
/tools/vis.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 | <head>
 3 |     <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
 4 | 
 5 |     <style type="text/css">
 6 |         #mynetwork {
 7 |             width: 100%;
 8 |             height: 80%;
 9 |             border: 1px solid lightgray;
10 |         }
11 |         #myedit{
12 |             width: 100%;
13 |             height: 20%;
14 |             border: 1px solid lightgray;
15 |         }
16 |     </style>
17 | </head>
18 | <body>
19 | <div id="mynetwork"></div>
20 | <textarea id="myedit" name="textarea" rows="10" cols="50"></textarea>
21 | 
22 | <script type="text/javascript">
23 |     const textarea = document.getElementById('myedit');
24 | 
25 |     function drawGraph(graph) {
26 |         // create an array with nodes
27 |         var nodes = new vis.DataSet(graph.nodes);
28 | 
29 |         // create an array with edges
30 |         var edges = new vis.DataSet(graph.edges);
31 | 
32 |         // create a network
33 |         var container = document.getElementById('mynetwork');
34 | 
35 |         // provide the data in the vis format
36 |         var data = {
37 |             nodes: nodes,
38 |             edges: edges
39 |         };
40 |         var options = {
41 |             layout: {
42 |                 hierarchical: {
43 |                     direction: "UD",
44 |                     sortMethod: "directed",
45 |                     parentCentralization: false,
46 |                 },
47 |             },
48 |             edges: {
49 |                 arrows: "to",
50 |                 smooth: true,
51 |             },
52 |             nodes: {
53 |                 shape: "box",
54 |             }
55 |         };
56 | 
57 |         // initialize your network!
58 |         var network = new vis.Network(container, data, options);
59 |     }
60 | 
61 |     // Add an event listener for the 'input' event, which fires whenever the textarea content changes
62 |     textarea.addEventListener('input', function() {
63 |         // Execute your JavaScript code here
64 |         // console.log('Textarea content changed:', textarea.value);
65 |         try {
66 |             const parsedData = JSON.parse(textarea.value);
67 |             drawGraph(parsedData);
68 |         } catch (error) {
69 |             console.error('Error parsing JSON:', error);
70 |         }
71 |     });
72 | </script>
73 | </body>
74 | </html>


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/union_pruning.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{
 6 |             utils::{
 7 |                 apply_map_to_parents_and_replace_input, required_columns_from_parents,
 8 |                 required_columns_to_column_map,
 9 |             },
10 |             OptRuleType, Rule,
11 |         },
12 |         NodeId, QueryGraph, QueryNode,
13 |     },
14 |     scalar_expr::ScalarExpr,
15 | };
16 | 
17 | /// Rule that given a shared union where all its parents are pruning projections, computes
18 | /// the superset of columns required by all its parents, and prunes the columns not used
19 | /// by any of them, replacing the parents of the union with projections over the pruned
20 | /// union. A pruning projection is inserted under each branch of the pruned union.
21 | pub struct UnionPruningRule {}
22 | 
23 | impl Rule for UnionPruningRule {
24 |     fn rule_type(&self) -> OptRuleType {
25 |         OptRuleType::TopDown
26 |     }
27 | 
28 |     fn apply(
29 |         &self,
30 |         query_graph: &mut QueryGraph,
31 |         node_id: NodeId,
32 |     ) -> Option<Vec<(NodeId, NodeId)>> {
33 |         if let QueryNode::Union { inputs } = query_graph.node(node_id) {
34 |             if let Some(required_columns) = required_columns_from_parents(query_graph, node_id) {
35 |                 // Prune the branches
36 |                 let column_map = required_columns_to_column_map(&required_columns);
37 |                 let proj = column_map
38 |                     .iter()
39 |                     .map(|(i, _)| *i)
40 |                     .sorted()
41 |                     .map(|i| ScalarExpr::InputRef { index: i }.into())
42 |                     .collect::<Vec<_>>();
43 |                 let new_inputs = inputs
44 |                     .clone() // clone to make the borrow checker happy
45 |                     .iter()
46 |                     .map(|input| query_graph.project(*input, proj.clone()))
47 |                     .collect();
48 |                 let new_union = query_graph.add_node(QueryNode::Union { inputs: new_inputs });
49 | 
50 |                 // Rewrite the parent projections
51 |                 return Some(apply_map_to_parents_and_replace_input(
52 |                     query_graph,
53 |                     node_id,
54 |                     &column_map,
55 |                     new_union,
56 |                 ));
57 |             }
58 |         }
59 |         None
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/bin/dag.rs:
--------------------------------------------------------------------------------
 1 | use rust_sql::query_graph::optimizer::OptimizerContext;
 2 | use rust_sql::query_graph::optimizer::DEFAULT_OPTIMIZER;
 3 | use rust_sql::query_graph::*;
 4 | use rust_sql::scalar_expr::*;
 5 | 
 6 | fn main() {
 7 |     let mut query_graph = {
 8 |         let mut query_graph = QueryGraph::new();
 9 |         // select col0, col2 from (select col0, col9, col2 || col4 from (select * from table_1 where col0 = 'hello') where col5 = 'world')
10 |         let table_scan_1 = query_graph.table_scan(1, 10);
11 |         let filter_1 = query_graph.filter(
12 |             table_scan_1,
13 |             vec![ScalarExpr::input_ref(0)
14 |                 .binary(
15 |                     BinaryOp::Eq,
16 |                     ScalarExpr::string_literal("hello".to_string()).into(),
17 |                 )
18 |                 .into()],
19 |         );
20 |         let filter_2 = query_graph.filter(
21 |             filter_1,
22 |             vec![ScalarExpr::input_ref(5)
23 |                 .binary(
24 |                     BinaryOp::Eq,
25 |                     ScalarExpr::string_literal("world".to_string()).into(),
26 |                 )
27 |                 .into()],
28 |         );
29 |         let project_1 = query_graph.project(
30 |             filter_2,
31 |             vec![
32 |                 ScalarExpr::input_ref(0).into(),
33 |                 ScalarExpr::input_ref(9).into(),
34 |                 ScalarExpr::nary(
35 |                     NaryOp::Concat,
36 |                     vec![
37 |                         ScalarExpr::input_ref(2).into(),
38 |                         ScalarExpr::input_ref(4).into(),
39 |                     ],
40 |                 )
41 |                 .into(),
42 |             ],
43 |         );
44 |         let project_2 = query_graph.project(
45 |             project_1,
46 |             vec![
47 |                 ScalarExpr::input_ref(0).into(),
48 |                 ScalarExpr::input_ref(2).into(),
49 |             ],
50 |         );
51 |         query_graph.set_entry_node(project_2);
52 |         query_graph
53 |     };
54 | 
55 |     let optimizer = &DEFAULT_OPTIMIZER;
56 | 
57 |     println!("Before:\n\n{}", query_graph.fully_annotated_explain());
58 | 
59 |     println!("Before:\n\n{}", query_graph.explain());
60 |     let mut opt_context = OptimizerContext::new();
61 |     optimizer.optimize(&mut opt_context, &mut query_graph);
62 |     println!("After:\n\n{}", query_graph.explain());
63 | 
64 |     query_graph.garbage_collect();
65 |     println!("After:\n\n{}", query_graph.explain());
66 | }
67 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_aggregate_transpose.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule},
 6 |         properties::pulled_up_predicates,
 7 |         NodeId, QueryGraph, QueryNode,
 8 |     },
 9 |     scalar_expr::rewrite::{apply_column_map, to_column_map_for_expr_push_down},
10 | };
11 | 
12 | pub struct FilterAggregateTransposeRule {}
13 | 
14 | impl SingleReplacementRule for FilterAggregateTransposeRule {
15 |     fn rule_type(&self) -> OptRuleType {
16 |         OptRuleType::TopDown
17 |     }
18 | 
19 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
20 |         if let QueryNode::Aggregate {
21 |             group_key,
22 |             aggregates,
23 |             input: agg_input,
24 |         } = query_graph.node(node_id)
25 |         {
26 |             if let Some(conditions) = common_parent_filters(query_graph, node_id) {
27 |                 let column_map = to_column_map_for_expr_push_down(group_key);
28 |                 let known_predicates = pulled_up_predicates(query_graph, *agg_input);
29 |                 let pushable_conditions = conditions
30 |                     .iter()
31 |                     .enumerate()
32 |                     .filter_map(|(i, expr)| {
33 |                         if let Some(condition) = apply_column_map(expr, &column_map) {
34 |                             if !known_predicates.contains(&condition) {
35 |                                 return Some((i, condition));
36 |                             }
37 |                         }
38 |                         None
39 |                     })
40 |                     .collect::<HashMap<_, _>>();
41 | 
42 |                 if !pushable_conditions.is_empty() {
43 |                     let new_group_key = group_key.clone();
44 |                     let new_aggregates = aggregates.clone();
45 |                     let new_filter = query_graph.filter(
46 |                         *agg_input,
47 |                         pushable_conditions
48 |                             .iter()
49 |                             .map(|(_, expr)| expr.clone())
50 |                             .collect::<Vec<_>>(),
51 |                     );
52 |                     let new_aggregate = query_graph.add_node(QueryNode::Aggregate {
53 |                         group_key: new_group_key,
54 |                         aggregates: new_aggregates,
55 |                         input: new_filter,
56 |                     });
57 | 
58 |                     return Some(new_aggregate);
59 |                 }
60 |             }
61 |         }
62 |         None
63 |     }
64 | }
65 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/prune_aggregate_input.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::{BTreeSet, HashMap};
 2 | 
 3 | use itertools::Itertools;
 4 | 
 5 | use crate::{
 6 |     query_graph::{
 7 |         optimizer::{OptRuleType, SingleReplacementRule},
 8 |         properties::{input_dependencies, num_columns},
 9 |         NodeId, QueryGraph, QueryNode,
10 |     },
11 |     scalar_expr::{AggregateExpr, ScalarExpr},
12 | };
13 | 
14 | /// Given an aggregate node not using all the columns from its input, it inserts
15 | /// a pruning projection and replaces it with a new aggregation over the pruning
16 | /// projection.
17 | pub struct PruneAggregateInputRule {}
18 | 
19 | impl SingleReplacementRule for PruneAggregateInputRule {
20 |     fn rule_type(&self) -> OptRuleType {
21 |         OptRuleType::TopDown
22 |     }
23 | 
24 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
25 |         if let QueryNode::Aggregate {
26 |             group_key,
27 |             aggregates,
28 |             input,
29 |         } = query_graph.node(node_id)
30 |         {
31 |             let num_columns = num_columns(query_graph, *input);
32 |             let input_dependencies = input_dependencies(query_graph, node_id);
33 |             if num_columns != input_dependencies.len() {
34 |                 let column_map = input_dependencies
35 |                     .iter()
36 |                     .sorted()
37 |                     .enumerate()
38 |                     .map(|(i, j)| (*j, i))
39 |                     .collect::<HashMap<_, _>>();
40 |                 let new_group_key = group_key
41 |                     .iter()
42 |                     .map(|k| *column_map.get(k).unwrap())
43 |                     .collect::<BTreeSet<_>>();
44 |                 let new_aggregates = aggregates
45 |                     .iter()
46 |                     .map(|k| {
47 |                         AggregateExpr {
48 |                             op: k.op.clone(),
49 |                             operands: k
50 |                                 .operands
51 |                                 .iter()
52 |                                 .map(|e| *column_map.get(e).unwrap())
53 |                                 .collect_vec(),
54 |                         }
55 |                         .into()
56 |                     })
57 |                     .collect_vec();
58 |                 let project_outputs = input_dependencies
59 |                     .iter()
60 |                     .sorted()
61 |                     .map(|i| ScalarExpr::input_ref(*i).into())
62 |                     .collect();
63 | 
64 |                 let pruning_project = query_graph.project(*input, project_outputs);
65 |                 return Some(query_graph.add_node(QueryNode::Aggregate {
66 |                     group_key: new_group_key,
67 |                     aggregates: new_aggregates,
68 |                     input: pruning_project,
69 |                 }));
70 |             }
71 |         }
72 |         None
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/aggregate_pruning.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::query_graph::{
 4 |     optimizer::{
 5 |         utils::{
 6 |             apply_map_to_parents_and_replace_input, required_columns_from_parents,
 7 |             required_columns_to_column_map,
 8 |         },
 9 |         OptRuleType, Rule,
10 |     },
11 |     properties::num_columns,
12 |     NodeId, QueryGraph, QueryNode,
13 | };
14 | 
15 | /// Rule that given a shared aggregate where all its parents are pruning projections, computes
16 | /// the superset of columns required by all its parents, and prunes the columns not used
17 | /// by any of them, replacing the parents of the aggregate with projections over the pruned
18 | /// aggregate.
19 | /// Only aggregate expressions can be pruned.
20 | pub struct AggregatePruningRule {}
21 | 
22 | impl Rule for AggregatePruningRule {
23 |     fn rule_type(&self) -> OptRuleType {
24 |         OptRuleType::TopDown
25 |     }
26 | 
27 |     fn apply(
28 |         &self,
29 |         query_graph: &mut QueryGraph,
30 |         node_id: NodeId,
31 |     ) -> Option<Vec<(NodeId, NodeId)>> {
32 |         if let QueryNode::Aggregate {
33 |             group_key,
34 |             aggregates,
35 |             input,
36 |         } = query_graph.node(node_id)
37 |         {
38 |             if let Some(mut required_columns) = required_columns_from_parents(query_graph, node_id)
39 |             {
40 |                 // All the columns from the grouping key are implicitly required
41 |                 required_columns.extend(0..group_key.len());
42 |                 let num_columns = num_columns(query_graph, node_id);
43 |                 if required_columns.len() == num_columns {
44 |                     return None;
45 |                 }
46 |                 let new_group_key = group_key.clone();
47 |                 let new_aggregates = aggregates
48 |                     .iter()
49 |                     .enumerate()
50 |                     .filter(|(i, _)| {
51 |                         let col_offset = group_key.len() + i;
52 |                         required_columns.contains(&col_offset)
53 |                     })
54 |                     .map(|(_, e)| e.clone())
55 |                     .collect_vec();
56 |                 assert_ne!(new_aggregates.len(), aggregates.len());
57 |                 let new_input = *input;
58 |                 let new_aggregate = query_graph.add_node(QueryNode::Aggregate {
59 |                     group_key: new_group_key,
60 |                     aggregates: new_aggregates,
61 |                     input: new_input,
62 |                 });
63 | 
64 |                 // Rewrite the parent projections
65 |                 let column_map = required_columns_to_column_map(&required_columns);
66 |                 return Some(apply_map_to_parents_and_replace_input(
67 |                     query_graph,
68 |                     node_id,
69 |                     &column_map,
70 |                     new_aggregate,
71 |                 ));
72 |             }
73 |         }
74 |         None
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/expression_reduction.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{OptRuleType, SingleReplacementRule},
 6 |         properties::row_type,
 7 |         NodeId, QueryGraph, QueryNode,
 8 |     },
 9 |     scalar_expr::reduction::reduce_and_prune_exists_subplans_recursively,
10 | };
11 | 
12 | pub struct ExpressionReductionRule;
13 | 
14 | impl SingleReplacementRule for ExpressionReductionRule {
15 |     fn rule_type(&self) -> OptRuleType {
16 |         OptRuleType::Always
17 |     }
18 | 
19 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
20 |         let new_node = match query_graph.node(node_id) {
21 |             QueryNode::Project { outputs, input } => {
22 |                 let row_type = row_type(query_graph, *input);
23 |                 let input = *input;
24 |                 let mut outputs = outputs.clone();
25 |                 outputs.iter_mut().for_each(|e| {
26 |                     *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type)
27 |                 });
28 |                 query_graph.project(input, outputs)
29 |             }
30 |             QueryNode::Filter { conditions, input } => {
31 |                 let row_type = row_type(query_graph, *input);
32 |                 let input = *input;
33 |                 let mut conditions = conditions.clone();
34 |                 conditions.iter_mut().for_each(|e| {
35 |                     *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type)
36 |                 });
37 |                 query_graph.filter(input, conditions)
38 |             }
39 |             QueryNode::Join {
40 |                 join_type,
41 |                 conditions,
42 |                 left,
43 |                 right,
44 |             } => {
45 |                 let left_row_type = row_type(query_graph, *left);
46 |                 let right_row_type = row_type(query_graph, *right);
47 |                 let row_type = left_row_type
48 |                     .iter()
49 |                     .chain(right_row_type.iter())
50 |                     .cloned()
51 |                     .collect_vec();
52 |                 let left = *left;
53 |                 let right = *right;
54 |                 let join_type = join_type.clone();
55 |                 let mut conditions = conditions.clone();
56 |                 conditions.iter_mut().for_each(|e| {
57 |                     *e = reduce_and_prune_exists_subplans_recursively(e, query_graph, &row_type)
58 |                 });
59 |                 query_graph.add_node(QueryNode::Join {
60 |                     join_type,
61 |                     conditions,
62 |                     left,
63 |                     right,
64 |                 })
65 |             }
66 |             _ => node_id,
67 |         };
68 |         // Note: the graph may contain duplicated nodes as a result of input
69 |         // replacements.
70 |         if new_node != node_id && query_graph.node(new_node) != query_graph.node(node_id) {
71 |             Some(new_node)
72 |         } else {
73 |             None
74 |         }
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_merge.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::query_graph::{
 4 |     optimizer::{OptRuleType, SingleReplacementRule},
 5 |     NodeId, QueryGraph, QueryNode,
 6 | };
 7 | 
 8 | /// Optimization rule that fuses two chained Filter nodes, concatenating the filter expressions
 9 | /// they contain.
10 | pub struct FilterMergeRule {}
11 | 
12 | impl SingleReplacementRule for FilterMergeRule {
13 |     fn rule_type(&self) -> OptRuleType {
14 |         OptRuleType::TopDown
15 |     }
16 | 
17 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
18 |         if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) {
19 |             if let QueryNode::Filter {
20 |                 conditions: child_conditions,
21 |                 input: child_input,
22 |             } = query_graph.node(*input)
23 |             {
24 |                 let conditions = conditions
25 |                     .iter()
26 |                     .cloned()
27 |                     .chain(child_conditions.iter().cloned())
28 |                     .collect_vec();
29 |                 let new_input = *child_input;
30 |                 return Some(query_graph.filter(new_input, conditions));
31 |             }
32 |         }
33 |         None
34 |     }
35 | }
36 | 
37 | #[cfg(test)]
38 | mod tests {
39 |     use crate::{
40 |         query_graph::QueryGraph,
41 |         query_graph::{optimizer::SingleReplacementRule, QueryNode},
42 |         scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef},
43 |     };
44 | 
45 |     use super::FilterMergeRule;
46 | 
47 |     #[test]
48 |     fn test_filter_merge() {
49 |         let mut query_graph = QueryGraph::new();
50 |         let table_scan_id = query_graph.table_scan(0, 10);
51 |         let project_id = query_graph.project(
52 |             table_scan_id,
53 |             (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(),
54 |         );
55 | 
56 |         let filter_1: ScalarExprRef = ScalarExpr::input_ref(0)
57 |             .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into())
58 |             .into();
59 |         let filter_id_1 = query_graph.filter(project_id, vec![filter_1.clone()]);
60 |         let filter_2: ScalarExprRef = ScalarExpr::input_ref(2)
61 |             .binary(BinaryOp::Gt, ScalarExpr::input_ref(3).into())
62 |             .into();
63 |         let filter_id_2 = query_graph.filter(filter_id_1, vec![filter_2.clone()]);
64 |         query_graph.set_entry_node(filter_id_2);
65 | 
66 |         let filter_merge_rule = FilterMergeRule {};
67 |         assert!(filter_merge_rule
68 |             .apply(&mut query_graph, project_id)
69 |             .is_none());
70 | 
71 |         assert!(filter_merge_rule
72 |             .apply(&mut query_graph, filter_id_1)
73 |             .is_none());
74 | 
75 |         let merged_filter_id = filter_merge_rule
76 |             .apply(&mut query_graph, filter_id_2)
77 |             .unwrap();
78 |         if let QueryNode::Filter {
79 |             input, conditions, ..
80 |         } = query_graph.node(merged_filter_id)
81 |         {
82 |             assert_eq!(*input, project_id);
83 |             assert_eq!(*conditions, vec![filter_2, filter_1]);
84 |         } else {
85 |             panic!();
86 |         }
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_normalization.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{OptRuleType, SingleReplacementRule},
 6 |         properties::{equivalence_classes, pulled_up_predicates},
 7 |         NodeId, QueryGraph, QueryNode,
 8 |     },
 9 |     scalar_expr::{
10 |         equivalence_class::to_replacement_map, rewrite::replace_sub_expressions_pre, ScalarExpr,
11 |         ScalarExprRef,
12 |     },
13 | };
14 | 
15 | /// Rule that, among other things, removes filter nodes, either partially or fully, enforcing
16 | /// predicates that are already enforced by some descendent node.
17 | ///
18 | /// Expressions are normalized so that each sub-expression is replaced with the representative
19 | /// of their class, if any. For example, if we know that `'hello'` and `ref_1` belong to the
20 | /// same equivalence class, then we can replace any appearance of `ref_1` with `'hello'` literal
21 | /// as literals come before input references.
22 | ///
23 | /// Finally, it removes TRUE conditions from filter nodes.
24 | pub struct FilterNormalizationRule {}
25 | 
26 | impl SingleReplacementRule for FilterNormalizationRule {
27 |     fn rule_type(&self) -> OptRuleType {
28 |         OptRuleType::Always
29 |     }
30 | 
31 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
32 |         if let QueryNode::Filter { conditions, input } = query_graph.node(node_id) {
33 |             let classes = equivalence_classes(query_graph, *input);
34 |             let predicates = pulled_up_predicates(query_graph, *input);
35 |             let mut replacement_map = to_replacement_map(&classes);
36 |             let true_literal: ScalarExprRef = ScalarExpr::true_literal().into();
37 |             // Anything that is already enforced by a descendent node, can be assumed
38 |             // to be true.
39 |             replacement_map.extend(
40 |                 predicates
41 |                     .iter()
42 |                     .map(|predicate| (predicate.clone(), true_literal.clone())),
43 |             );
44 |             // [A = 1, B = 1 OR A = 1] results in [A = 1, B = 1 OR TRUE] which will
45 |             // be later reduced to just [A = 1].
46 |             let mut new_conditions = conditions.clone();
47 |             for i in 0..new_conditions.len() {
48 |                 let mut replacement_map = replacement_map.clone();
49 |                 replacement_map.extend(
50 |                     new_conditions
51 |                         .iter()
52 |                         .enumerate()
53 |                         .filter(|(j, _)| i != *j)
54 |                         .map(|(_, e)| (e.clone(), true_literal.clone())),
55 |                 );
56 |                 new_conditions[i] =
57 |                     replace_sub_expressions_pre(&new_conditions[i], &replacement_map);
58 |             }
59 |             // TODO(asenac) reduce expressions after applying the replacements. All of the above
60 |             // could be part of the reduction of AND expressions.
61 |             let new_conditions = new_conditions
62 |                 .into_iter()
63 |                 .filter(|e| *e != true_literal)
64 |                 .sorted()
65 |                 .dedup()
66 |                 .collect_vec();
67 | 
68 |             if new_conditions != *conditions {
69 |                 return Some(query_graph.filter(*input, new_conditions));
70 |             }
71 |         }
72 |         None
73 |     }
74 | }
75 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_apply_transpose.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule},
 4 |         properties::{num_columns, pulled_up_predicates},
 5 |         ApplyType, NodeId, QueryGraph, QueryNode,
 6 |     },
 7 |     scalar_expr::{rewrite::shift_left_input_refs, visitor::collect_input_dependencies},
 8 | };
 9 | 
10 | /// Rule that pushes filters through apply.
11 | ///
12 | /// Collects the common filter among all the parents of the apply and, pushes down those
13 | /// only referring to one apply input.
14 | pub struct FilterApplyTransposeRule {}
15 | 
16 | impl SingleReplacementRule for FilterApplyTransposeRule {
17 |     fn rule_type(&self) -> OptRuleType {
18 |         OptRuleType::TopDown
19 |     }
20 | 
21 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
22 |         if let QueryNode::Apply {
23 |             apply_type,
24 |             left,
25 |             right,
26 |             correlation,
27 |         } = query_graph.node(node_id)
28 |         {
29 |             if let Some(common_conditions) = common_parent_filters(query_graph, node_id) {
30 |                 let left_num_columns = num_columns(query_graph, *left);
31 |                 let mut left_predicates = Vec::new();
32 |                 let mut right_predicates = Vec::new();
33 | 
34 |                 let known_predicates = pulled_up_predicates(query_graph, node_id);
35 |                 let allowed_right_pushdown = match apply_type {
36 |                     ApplyType::Inner => true,
37 |                     ApplyType::LeftOuter => false,
38 |                 };
39 | 
40 |                 for condition in common_conditions.iter() {
41 |                     if known_predicates.contains(condition) {
42 |                         // Skip those already known to be enforced either
43 |                         // by any descendent node.
44 |                         continue;
45 |                     }
46 |                     let dependencies = collect_input_dependencies(condition);
47 |                     if !dependencies.is_empty() {
48 |                         if dependencies.iter().all(|x| *x < left_num_columns) {
49 |                             left_predicates.push(condition.clone());
50 |                         } else if allowed_right_pushdown
51 |                             && dependencies.iter().all(|x| *x >= left_num_columns)
52 |                         {
53 |                             right_predicates
54 |                                 .push(shift_left_input_refs(condition, left_num_columns));
55 |                         }
56 |                     }
57 |                 }
58 | 
59 |                 if !left_predicates.is_empty() || !right_predicates.is_empty() {
60 |                     let correlation = correlation.clone();
61 |                     let left = *left;
62 |                     let right = *right;
63 |                     let apply_type = *apply_type;
64 |                     let left = query_graph.filter(left, left_predicates);
65 |                     let right = query_graph.filter(right, right_predicates);
66 | 
67 |                     return Some(query_graph.add_node(QueryNode::Apply {
68 |                         correlation,
69 |                         left,
70 |                         right,
71 |                         apply_type,
72 |                     }));
73 |                 }
74 |             }
75 |         }
76 |         None
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/mod.rs:
--------------------------------------------------------------------------------
  1 | //! This module contains the different properties that can be computed from the query graph.
  2 | //!
  3 | //! Most of these properties are computed bottom-up and contain a lot of boilerplate code that
  4 | //! could make use of some generics.
  5 | //!
  6 | use std::{
  7 |     any::{Any, TypeId},
  8 |     collections::HashMap,
  9 | };
 10 | 
 11 | use crate::query_graph::NodeId;
 12 | 
 13 | mod column_provenance;
 14 | mod correlated_input_refs;
 15 | mod equivalence_classes;
 16 | mod input_dependencies;
 17 | mod keys;
 18 | mod num_columns;
 19 | mod pulled_up_predicates;
 20 | mod row_type;
 21 | mod subqueries;
 22 | 
 23 | pub use column_provenance::column_provenance;
 24 | pub use column_provenance::ColumnProvenanceInfo;
 25 | pub use correlated_input_refs::node_correlated_input_refs;
 26 | pub use correlated_input_refs::subgraph_correlated_input_refs;
 27 | pub use correlated_input_refs::subgraph_correlated_input_refs_annotator;
 28 | pub use equivalence_classes::equivalence_classes;
 29 | pub use input_dependencies::input_dependencies;
 30 | pub use keys::empty_key;
 31 | pub use keys::is_empty_relation;
 32 | pub use keys::keys;
 33 | pub use keys::keys_annotator;
 34 | pub use keys::unique_key;
 35 | pub use num_columns::num_columns;
 36 | pub use num_columns::num_columns_annotator;
 37 | pub use pulled_up_predicates::pulled_up_predicates;
 38 | pub use pulled_up_predicates::pulled_up_predicates_annotator;
 39 | pub use row_type::cross_product_row_type;
 40 | pub use row_type::row_type;
 41 | pub use row_type::row_type_annotator;
 42 | pub use subqueries::subgraph_subqueries;
 43 | pub use subqueries::subqueries;
 44 | 
 45 | use super::QueryGraph;
 46 | 
 47 | /// Annotators used for explaining query plans.
 48 | pub fn default_annotators() -> Vec<&'static dyn Fn(&QueryGraph, NodeId) -> Option<String>> {
 49 |     vec![
 50 |         &num_columns_annotator,
 51 |         &row_type_annotator,
 52 |         &pulled_up_predicates_annotator,
 53 |         &keys_annotator,
 54 |         &subgraph_correlated_input_refs_annotator,
 55 |     ]
 56 | }
 57 | 
 58 | /// Cache for compute properties
 59 | pub struct PropertyCache {
 60 |     /// Properties computed in a bottom-up manner.
 61 |     bottom_up_properties: HashMap<NodeId, HashMap<TypeId, Box<dyn Any>>>,
 62 |     /// Properties computed only from the node itself
 63 |     single_node_properties: HashMap<NodeId, HashMap<TypeId, Box<dyn Any>>>,
 64 | }
 65 | 
 66 | impl PropertyCache {
 67 |     pub fn new() -> Self {
 68 |         Self {
 69 |             bottom_up_properties: HashMap::new(),
 70 |             single_node_properties: HashMap::new(),
 71 |         }
 72 |     }
 73 | 
 74 |     pub fn node_bottom_up_properties(
 75 |         &mut self,
 76 |         node_id: NodeId,
 77 |     ) -> &mut HashMap<TypeId, Box<dyn Any>> {
 78 |         self.bottom_up_properties
 79 |             .entry(node_id)
 80 |             .or_insert_with(|| HashMap::new())
 81 |     }
 82 | 
 83 |     /// Properties computed using only information contained in the node.
 84 |     pub fn single_node_properties(
 85 |         &mut self,
 86 |         node_id: NodeId,
 87 |     ) -> &mut HashMap<TypeId, Box<dyn Any>> {
 88 |         self.single_node_properties
 89 |             .entry(node_id)
 90 |             .or_insert_with(|| HashMap::new())
 91 |     }
 92 | 
 93 |     pub fn invalidate_bottom_up_properties(&mut self, node_id: NodeId) {
 94 |         self.bottom_up_properties.remove(&node_id);
 95 |     }
 96 | 
 97 |     pub fn invalidate_single_node_properties(&mut self, node_id: NodeId) {
 98 |         self.single_node_properties.remove(&node_id);
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/remove_passthrough_project.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     query_graph::{
  3 |         optimizer::{OptRuleType, SingleReplacementRule},
  4 |         properties::num_columns,
  5 |         NodeId, QueryGraph, QueryNode,
  6 |     },
  7 |     scalar_expr::ScalarExpr,
  8 | };
  9 | 
 10 | pub struct RemovePassthroughProjectRule {}
 11 | 
 12 | impl SingleReplacementRule for RemovePassthroughProjectRule {
 13 |     fn rule_type(&self) -> OptRuleType {
 14 |         OptRuleType::Always
 15 |     }
 16 | 
 17 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
 18 |         if query_graph
 19 |             .get_parents(node_id)
 20 |             .map(|parents| parents.contains(&QueryGraph::ROOT_NODE_ID))
 21 |             .unwrap_or(false)
 22 |         {
 23 |             return None;
 24 |         }
 25 |         if let QueryNode::Project { outputs, input } = query_graph.node(node_id) {
 26 |             if query_graph.num_parents(node_id) > 0
 27 |                 && outputs.len() == num_columns(query_graph, *input)
 28 |                 && outputs
 29 |                     .iter()
 30 |                     .enumerate()
 31 |                     .all(|(id, expr)| match expr.as_ref() {
 32 |                         ScalarExpr::InputRef { index } => *index == id,
 33 |                         _ => false,
 34 |                     })
 35 |             {
 36 |                 return Some(*input);
 37 |             }
 38 |         }
 39 |         None
 40 |     }
 41 | }
 42 | 
 43 | #[cfg(test)]
 44 | mod tests {
 45 |     use crate::{
 46 |         query_graph::optimizer::SingleReplacementRule,
 47 |         query_graph::QueryGraph,
 48 |         scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef},
 49 |     };
 50 | 
 51 |     use super::RemovePassthroughProjectRule;
 52 | 
 53 |     #[test]
 54 |     fn test_remove_passthrough_project() {
 55 |         let mut query_graph = QueryGraph::new();
 56 |         let table_scan_id = query_graph.table_scan(0, 10);
 57 |         let filter_1: ScalarExprRef = ScalarExpr::input_ref(0)
 58 |             .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into())
 59 |             .into();
 60 |         let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]);
 61 |         let project_id_1 = query_graph.project(
 62 |             filter_id,
 63 |             (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(),
 64 |         );
 65 |         let project_id_2 = query_graph.project(
 66 |             filter_id,
 67 |             (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(),
 68 |         );
 69 |         let project_id_3 = query_graph.project(
 70 |             project_id_1,
 71 |             (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(),
 72 |         );
 73 |         let project_id_4 = query_graph.project(
 74 |             project_id_2,
 75 |             (0..10).map(|i| ScalarExpr::input_ref(i).into()).collect(),
 76 |         );
 77 | 
 78 |         let remove_passthrough_project = RemovePassthroughProjectRule {};
 79 |         assert!(remove_passthrough_project
 80 |             .apply(&mut query_graph, filter_id)
 81 |             .is_none());
 82 |         assert_eq!(
 83 |             remove_passthrough_project
 84 |                 .apply(&mut query_graph, project_id_1)
 85 |                 .unwrap(),
 86 |             filter_id
 87 |         );
 88 |         assert!(remove_passthrough_project
 89 |             .apply(&mut query_graph, project_id_2)
 90 |             .is_none());
 91 | 
 92 |         assert!(remove_passthrough_project
 93 |             .apply(&mut query_graph, project_id_3)
 94 |             .is_none());
 95 |         assert!(remove_passthrough_project
 96 |             .apply(&mut query_graph, project_id_4)
 97 |             .is_none());
 98 |     }
 99 | }
100 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/filter_join_transpose.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     query_graph::{
 3 |         optimizer::{utils::common_parent_filters, OptRuleType, SingleReplacementRule},
 4 |         properties::{num_columns, pulled_up_predicates},
 5 |         JoinType, NodeId, QueryGraph, QueryNode,
 6 |     },
 7 |     scalar_expr::{rewrite::shift_left_input_refs, visitor::collect_input_dependencies},
 8 | };
 9 | 
10 | /// Rule that pushes filters through join.
11 | ///
12 | /// Collects the common filter among all the parents of the join and, pushes down those
13 | /// only referring to one join input.
14 | pub struct FilterJoinTransposeRule {}
15 | 
16 | impl SingleReplacementRule for FilterJoinTransposeRule {
17 |     fn rule_type(&self) -> OptRuleType {
18 |         OptRuleType::TopDown
19 |     }
20 | 
21 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
22 |         if let QueryNode::Join {
23 |             join_type,
24 |             left,
25 |             right,
26 |             conditions,
27 |         } = query_graph.node(node_id)
28 |         {
29 |             if let Some(common_conditions) = common_parent_filters(query_graph, node_id) {
30 |                 let left_num_columns = num_columns(query_graph, *left);
31 |                 let mut left_predicates = Vec::new();
32 |                 let mut right_predicates = Vec::new();
33 | 
34 |                 let known_predicates = pulled_up_predicates(query_graph, node_id);
35 | 
36 |                 let allowed_left_pushdown = match join_type {
37 |                     JoinType::Semi | JoinType::Anti | JoinType::Inner | JoinType::LeftOuter => true,
38 |                     JoinType::RightOuter | JoinType::FullOuter => false,
39 |                 };
40 |                 let allowed_right_pushdown = match join_type {
41 |                     JoinType::Inner | JoinType::RightOuter => true,
42 |                     JoinType::Semi | JoinType::Anti | JoinType::LeftOuter | JoinType::FullOuter => {
43 |                         false
44 |                     }
45 |                 };
46 | 
47 |                 for condition in common_conditions.iter() {
48 |                     if known_predicates.contains(condition) {
49 |                         // Skip those already known to be enforced either
50 |                         // by the join or any descendent node.
51 |                         continue;
52 |                     }
53 |                     let dependencies = collect_input_dependencies(condition);
54 |                     if !dependencies.is_empty() {
55 |                         if allowed_left_pushdown
56 |                             && dependencies.iter().all(|x| *x < left_num_columns)
57 |                         {
58 |                             left_predicates.push(condition.clone());
59 |                         } else if allowed_right_pushdown
60 |                             && dependencies.iter().all(|x| *x >= left_num_columns)
61 |                         {
62 |                             right_predicates
63 |                                 .push(shift_left_input_refs(condition, left_num_columns));
64 |                         }
65 |                     }
66 |                 }
67 | 
68 |                 if !left_predicates.is_empty() || !right_predicates.is_empty() {
69 |                     let conditions = conditions.clone();
70 |                     let left = *left;
71 |                     let right = *right;
72 |                     let join_type = *join_type;
73 |                     let left = query_graph.filter(left, left_predicates);
74 |                     let right = query_graph.filter(right, right_predicates);
75 | 
76 |                     return Some(query_graph.join(join_type, left, right, conditions));
77 |                 }
78 |             }
79 |         }
80 |         None
81 |     }
82 | }
83 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/apply_pruning.rs:
--------------------------------------------------------------------------------
 1 | use itertools::Itertools;
 2 | 
 3 | use crate::{
 4 |     query_graph::{
 5 |         optimizer::{
 6 |             utils::{
 7 |                 apply_map_to_parents_and_replace_input, required_columns_from_parents,
 8 |                 required_columns_to_column_map,
 9 |             },
10 |             OptRuleType, Rule,
11 |         },
12 |         properties::num_columns,
13 |         NodeId, QueryGraph, QueryNode,
14 |     },
15 |     scalar_expr::{visitor::store_input_dependencies, ScalarExpr},
16 | };
17 | 
18 | /// Rule that given a shared apply where all its parents are pruning projections, computes
19 | /// the superset of columns required by all its parents, and prunes the columns not used
20 | /// by any of them, replacing the parents of the apply with projections over the pruned
21 | /// join.
22 | pub struct ApplyPruningRule {}
23 | 
24 | impl Rule for ApplyPruningRule {
25 |     fn rule_type(&self) -> OptRuleType {
26 |         OptRuleType::TopDown
27 |     }
28 | 
29 |     fn apply(
30 |         &self,
31 |         query_graph: &mut QueryGraph,
32 |         node_id: NodeId,
33 |     ) -> Option<Vec<(NodeId, NodeId)>> {
34 |         if let QueryNode::Apply {
35 |             apply_type,
36 |             left,
37 |             right,
38 |             correlation,
39 |         } = query_graph.node(node_id)
40 |         {
41 |             if let Some(mut required_columns) = required_columns_from_parents(query_graph, node_id)
42 |             {
43 |                 // Add the columns from the LHS referenced by the RHS
44 |                 for parameter in correlation.parameters.iter() {
45 |                     store_input_dependencies(parameter, &mut required_columns);
46 |                 }
47 |                 if required_columns.len() == num_columns(&query_graph, node_id) {
48 |                     // All columns are referenced, nothing to prune
49 |                     return None;
50 |                 }
51 |                 let column_map = required_columns_to_column_map(&required_columns);
52 |                 let left_num_columns = num_columns(query_graph, *left);
53 |                 let (left_columns, right_columns): (Vec<usize>, Vec<usize>) = required_columns
54 |                     .iter()
55 |                     .sorted()
56 |                     .partition(|col| **col < left_num_columns);
57 |                 let left_outputs = left_columns
58 |                     .iter()
59 |                     .map(|i| ScalarExpr::InputRef { index: *i }.into())
60 |                     .collect::<Vec<_>>();
61 |                 let right_outputs = right_columns
62 |                     .iter()
63 |                     .map(|i| {
64 |                         ScalarExpr::InputRef {
65 |                             index: *i - left_num_columns,
66 |                         }
67 |                         .into()
68 |                     })
69 |                     .collect::<Vec<_>>();
70 |                 let correlation = correlation.clone();
71 |                 let apply_type = *apply_type;
72 |                 let left = *left;
73 |                 let right = *right;
74 |                 let new_left = query_graph.project(left, left_outputs);
75 |                 let new_right = query_graph.project(right, right_outputs);
76 |                 let new_apply = query_graph.add_node(QueryNode::Apply {
77 |                     correlation,
78 |                     left: new_left,
79 |                     right: new_right,
80 |                     apply_type,
81 |                 });
82 | 
83 |                 // Rewrite the parent projections
84 |                 return Some(apply_map_to_parents_and_replace_input(
85 |                     query_graph,
86 |                     node_id,
87 |                     &column_map,
88 |                     new_apply,
89 |                 ));
90 |             }
91 |         }
92 |         None
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/aggregate_project_transpose.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::{BTreeSet, HashMap};
 2 | 
 3 | use itertools::Itertools;
 4 | 
 5 | use crate::{
 6 |     query_graph::{
 7 |         optimizer::{utils::sort_projection, OptRuleType, SingleReplacementRule},
 8 |         NodeId, QueryGraph, QueryNode,
 9 |     },
10 |     scalar_expr::{AggregateExpr, ScalarExpr},
11 | };
12 | 
13 | /// Given an aggregate node over a non-sorted projection, it creates a new aggregate node
14 | /// over a sorted version of the projection, and adds a re-ordering projection on top of
15 | /// the new aggregate node.
16 | pub struct AggregateProjectTransposeRule {}
17 | 
18 | impl SingleReplacementRule for AggregateProjectTransposeRule {
19 |     fn rule_type(&self) -> OptRuleType {
20 |         OptRuleType::BottomUp
21 |     }
22 | 
23 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
24 |         if let QueryNode::Aggregate {
25 |             group_key,
26 |             aggregates,
27 |             input,
28 |         } = query_graph.node(node_id)
29 |         {
30 |             if let QueryNode::Project {
31 |                 outputs,
32 |                 input: proj_input,
33 |             } = query_graph.node(*input)
34 |             {
35 |                 if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) {
36 |                     let column_map = reorder_map
37 |                         .iter()
38 |                         .enumerate()
39 |                         .map(|(i, e)| (*e, i))
40 |                         .collect::<HashMap<_, _>>();
41 |                     let new_group_key = group_key
42 |                         .iter()
43 |                         .map(|k| *column_map.get(k).unwrap())
44 |                         .collect::<BTreeSet<_>>();
45 |                     let new_aggregates = aggregates
46 |                         .iter()
47 |                         .map(|k| {
48 |                             AggregateExpr {
49 |                                 op: k.op.clone(),
50 |                                 operands: k
51 |                                     .operands
52 |                                     .iter()
53 |                                     .map(|e| *column_map.get(e).unwrap())
54 |                                     .collect_vec(),
55 |                             }
56 |                             .into()
57 |                         })
58 |                         .collect_vec();
59 | 
60 |                     // Reorder the grouping key elements in a projection over the new
61 |                     // aggregate node
62 |                     let group_key_len = group_key.len();
63 |                     let aggregates_len = aggregates.len();
64 |                     let reordering_proj = group_key
65 |                         .iter()
66 |                         .enumerate()
67 |                         .map(|(i, k)| (i, *column_map.get(k).unwrap()))
68 |                         .sorted_by_key(|(_, e)| *e)
69 |                         .enumerate()
70 |                         .sorted_by_key(|(_, (i, _))| *i)
71 |                         .map(|(i, _)| i)
72 |                         // ... and the aggregates
73 |                         .chain(group_key_len..group_key_len + aggregates_len)
74 |                         .map(|i| ScalarExpr::input_ref(i).into())
75 |                         .collect_vec();
76 | 
77 |                     let new_project = query_graph.project(*proj_input, sorted_proj);
78 |                     let new_aggregate = query_graph.add_node(QueryNode::Aggregate {
79 |                         group_key: new_group_key,
80 |                         aggregates: new_aggregates,
81 |                         input: new_project,
82 |                     });
83 |                     return Some(query_graph.project(new_aggregate, reordering_proj));
84 |                 }
85 |             }
86 |         }
87 |         None
88 |     }
89 | }
90 | 


--------------------------------------------------------------------------------
/tools/cytoscape.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 |   <head>
  3 |     <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1, maximum-scale=1">
  4 | 
  5 |     <script src="https://unpkg.com/cytoscape/dist/cytoscape.min.js"></script>
  6 |     <script src="https://unpkg.com/dagre@0.7.4/dist/dagre.js"></script>
  7 |     <script src="https://unpkg.com/cytoscape-dagre@2.5.0/cytoscape-dagre.js"></script>
  8 | 
  9 |     <style>
 10 |       body {
 11 |         font-family: helvetica;
 12 |         font-size: 14px;
 13 |       }
 14 | 
 15 |       #cy {
 16 |         width: 100%;
 17 |         height: 80%;
 18 |         border: 1px solid lightgray;
 19 |       }
 20 | 
 21 |       #myedit{
 22 |         width: 100%;
 23 |         height: 20%;
 24 |         border: 1px solid lightgray;
 25 |       }
 26 |     </style>
 27 | 
 28 |   </head>
 29 | 
 30 |   <body>
 31 |     <div id="cy"></div>
 32 |     <textarea id="myedit" name="textarea" rows="10" cols="50"></textarea>
 33 | 
 34 | 
 35 |     <script type="text/javascript">
 36 |         function drawGraph(graph) {
 37 | 
 38 |             console.log(graph.nodes);
 39 |             console.log(graph.edges);
 40 | 
 41 |             var nodes = graph.nodes.map(n => { return {data: { id: n.id, label: n.label, width: n.label.length * 4}}; });
 42 |             var edges = graph.edges.map(e => { return {data: { id: e.id, source: e.from, target: e.to, label: e.label}}; });
 43 |             console.log(nodes);
 44 |             console.log(edges);
 45 | 
 46 |             var cy = window.cy = cytoscape({
 47 |                 container: document.getElementById('cy'),
 48 | 
 49 |                 boxSelectionEnabled: false,
 50 |                 autounselectify: true,
 51 | 
 52 |                 layout: {
 53 |                     name: 'dagre'
 54 |                 },
 55 | 
 56 |                 style: [
 57 |                     {
 58 |                         selector: 'node',
 59 |                         style: {
 60 |                             // 'background-color': '#ffffff',
 61 |                             'content': 'data(label)',
 62 |                             'text-valign': 'center',
 63 |                             'text-halign': 'center',
 64 |                             'width': 'data(width)',
 65 |                             'height': '20px',
 66 |                             'shape': 'rectangle',
 67 |                             'font-size': '8px',
 68 |                         }
 69 |                     },
 70 | 
 71 |                     {
 72 |                         selector: 'edge',
 73 |                         style: {
 74 |                             'width': 2,
 75 |                             'target-arrow-shape': 'triangle',
 76 |                             // 'line-color': '#9dbaea',
 77 |                             // 'target-arrow-color': '#9dbaea',
 78 |                             'curve-style': 'bezier',
 79 |                             'content': 'data(label)',
 80 |                             'font-size': '6px',
 81 |                         }
 82 |                     }
 83 |                 ],
 84 | 
 85 |                 elements: {
 86 |                     nodes : nodes,
 87 |                     edges: edges,
 88 |                 }
 89 |             });
 90 |         }
 91 | 
 92 |         const textarea = document.getElementById('myedit');
 93 | 
 94 |         // Add an event listener for the 'input' event, which fires whenever the textarea content changes
 95 |         textarea.addEventListener('input', function() {
 96 |             // Execute your JavaScript code here
 97 |             // console.log('Textarea content changed:', textarea.value);
 98 |             try {
 99 |                 const parsedData = JSON.parse(textarea.value);
100 |                 drawGraph(parsedData);
101 |             } catch (error) {
102 |                 console.error('Error parsing JSON:', error);
103 |             }
104 |         });
105 |     </script>
106 |   </body>
107 | 
108 | </html>


--------------------------------------------------------------------------------
/src/value.rs:
--------------------------------------------------------------------------------
  1 | use core::fmt;
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::data_type::DataType;
  6 | 
  7 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash, Debug)]
  8 | pub enum Value {
  9 |     Bool(bool),
 10 |     Int(i32),
 11 |     BigInt(i64),
 12 |     String(String),
 13 |     List(Vec<Box<Value>>),
 14 |     Any(Box<Literal>),
 15 |     Null,
 16 | }
 17 | 
 18 | #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord, Debug)]
 19 | pub struct Literal {
 20 |     pub value: Value,
 21 |     pub data_type: DataType,
 22 | }
 23 | 
 24 | impl fmt::Display for Literal {
 25 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 26 |         Literal::fmt(f, &self.value, &self.data_type)
 27 |     }
 28 | }
 29 | 
 30 | impl Literal {
 31 |     pub fn new(value: Value, data_type: DataType) -> Self {
 32 |         Self { value, data_type }
 33 |     }
 34 | 
 35 |     pub fn build_default(data_type: DataType) -> Self {
 36 |         Self {
 37 |             value: default_value_for_data_type(&data_type),
 38 |             data_type,
 39 |         }
 40 |     }
 41 | 
 42 |     pub fn build_null(data_type: DataType) -> Self {
 43 |         Self {
 44 |             value: Value::Null,
 45 |             data_type,
 46 |         }
 47 |     }
 48 | 
 49 |     fn fmt(f: &mut fmt::Formatter, value: &Value, data_type: &DataType) -> fmt::Result {
 50 |         match (value, data_type) {
 51 |             (Value::Bool(value), DataType::Bool) => {
 52 |                 if *value {
 53 |                     write!(f, "TRUE")
 54 |                 } else {
 55 |                     write!(f, "FALSE")
 56 |                 }
 57 |             }
 58 |             (Value::Int(value), DataType::Int) => write!(f, "{}", value),
 59 |             (Value::BigInt(value), DataType::BigInt) => write!(f, "{}", value),
 60 |             // TODO(asenac) escape strings
 61 |             (Value::String(value), DataType::String) => write!(f, "'{}'", value),
 62 |             (Value::List(vec), DataType::Array(elem_type)) => {
 63 |                 write!(f, "[")?;
 64 |                 for (i, e) in vec.iter().enumerate() {
 65 |                     if i > 0 {
 66 |                         write!(f, ", ")?;
 67 |                     }
 68 |                     Self::fmt(f, &e, &elem_type)?;
 69 |                 }
 70 |                 write!(f, "]")
 71 |             }
 72 |             (Value::List(vec), DataType::Tuple(data_types)) => {
 73 |                 write!(f, "(")?;
 74 |                 for (i, (e, data_type)) in vec.iter().zip(data_types.iter()).enumerate() {
 75 |                     if i > 0 {
 76 |                         write!(f, ", ")?;
 77 |                     }
 78 |                     Self::fmt(f, e, data_type)?;
 79 |                 }
 80 |                 write!(f, ")")
 81 |             }
 82 |             (Value::Null, _) => write!(f, "NULL"),
 83 |             (Value::Any(literal), DataType::Any) => write!(f, "{}", literal),
 84 |             (_, _) => panic!("unsupported value - data type pair"),
 85 |         }
 86 |     }
 87 | 
 88 |     pub fn is_null(&self) -> bool {
 89 |         if let Value::Null = self.value {
 90 |             true
 91 |         } else {
 92 |             false
 93 |         }
 94 |     }
 95 | }
 96 | 
 97 | pub fn default_value_for_data_type(data_type: &DataType) -> Value {
 98 |     match data_type {
 99 |         DataType::Bool => Value::Bool(false),
100 |         DataType::Int => Value::Int(0),
101 |         DataType::BigInt => Value::BigInt(0),
102 |         DataType::String => Value::String("".to_string()),
103 |         DataType::Array(_) => Value::List(Vec::new()),
104 |         DataType::Tuple(members) => Value::List(
105 |             members
106 |                 .iter()
107 |                 .map(|nested_type| Box::new(default_value_for_data_type(nested_type)))
108 |                 .collect_vec(),
109 |         ),
110 |         DataType::Any => Value::Null,
111 |         DataType::Unknown => panic!("cannot create value of unknown type"),
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/tools/d3.html:
--------------------------------------------------------------------------------
  1 | <html>
  2 |   <head>
  3 |     <meta name="viewport" content="width=device-width, user-scalable=no, initial-scale=1, maximum-scale=1">
  4 | 
  5 |     <script src="https://d3js.org/d3.v5.min.js" charset="utf-8"></script>
  6 |     <script src="https://unpkg.com/dagre-d3@0.6.4/dist/dagre-d3.min.js"></script>
  7 | 
  8 |     <style>
  9 |       body {
 10 |         font-family: helvetica;
 11 |         font-size: 14px;
 12 |       }
 13 | 
 14 |       #svg {
 15 |         min-width: 100%;
 16 |         /* height: 80%; */
 17 |         border: 1px solid lightgray;
 18 |       }
 19 | 
 20 |       #myedit{
 21 |         width: 100%;
 22 |         /* height: 20%; */
 23 |         border: 1px solid lightgray;
 24 |       }
 25 |       .node rect {
 26 |         stroke: #333;
 27 |         fill: #dfc5c5;
 28 |       }
 29 |       .edgePath path.path {
 30 |         stroke: #333;
 31 |         fill: none;
 32 |         stroke-width: 1.5px;
 33 |       }
 34 |     </style>
 35 | 
 36 |   </head>
 37 | 
 38 |   <body>
 39 |     <svg id="svg"></svg>
 40 |     <textarea id="myedit" name="textarea" rows="10" cols="50"></textarea>
 41 | 
 42 |     <script type="text/javascript">
 43 |         function htmlLabel(node) {
 44 |           var label = '<p align="center">' + node.label + '</p>';
 45 |           if (node.annotations.length > 0) {
 46 |             for (let idx in node.annotations) {
 47 |               var annotation = node.annotations[idx];
 48 |               label += annotation + '<br/>';
 49 |             }
 50 |           }
 51 |           return label;
 52 |         }
 53 | 
 54 |         function drawGraph(graph) {
 55 |           // console.log(graph.nodes);
 56 |           // console.log(graph.edges);
 57 | 
 58 |           // Create the input graph
 59 |           var g = new dagreD3.graphlib.Graph()
 60 |                   .setGraph({})
 61 |                   .setDefaultEdgeLabel(function() { return {}; });
 62 | 
 63 |           for (let node_idx in graph.nodes) {
 64 |             var node = graph.nodes[node_idx];
 65 |             g.setNode(node.id, { label: htmlLabel(node), labelType: "html" });
 66 |           }
 67 | 
 68 |           g.nodes().forEach(function(v) {
 69 |               var node = g.node(v);
 70 |               // Round the corners of the nodes
 71 |               node.rx = node.ry = 5;
 72 |           });
 73 | 
 74 |           for (let edge_idx in graph.edges) {
 75 |             var edge = graph.edges[edge_idx];
 76 |             var existing_edge = g.edge(edge.from, edge.to);
 77 |             var existing_label = existing_edge? existing_edge.label + ", " : "";
 78 |             g.setEdge(edge.from, edge.to, { label: existing_label + edge.label });
 79 |           }
 80 | 
 81 |           // Create the renderer
 82 |           var render = new dagreD3.render();
 83 | 
 84 |           // Set up an SVG group so that we can translate the final graph.
 85 |           var svg = d3.select("svg"),
 86 |                   svgGroup = svg.append("g");
 87 | 
 88 |           // Run the renderer. This is what draws the final graph.
 89 |           render(d3.select("svg g"), g);
 90 | 
 91 |           // Center the graph
 92 |           let box = document.querySelector('#svg');
 93 |           // var xCenterOffset = (width - g.graph().width) / 2;
 94 |           var xCenterOffset = 20;
 95 |           svgGroup.attr("transform", "translate(" + xCenterOffset + ", 20)");
 96 |           svg.attr("height", g.graph().height + 40);
 97 |           if (g.graph().width > box.clientWidth) {
 98 |             svg.attr("width", g.graph().width + 40);
 99 |           }
100 |         }
101 | 
102 |         const textarea = document.getElementById('myedit');
103 | 
104 |         // Add an event listener for the 'input' event, which fires whenever the textarea content changes
105 |         textarea.addEventListener('input', function() {
106 |             // Execute your JavaScript code here
107 |             // console.log('Textarea content changed:', textarea.value);
108 |             try {
109 |                 const parsedData = JSON.parse(textarea.value);
110 |                 drawGraph(parsedData);
111 |             } catch (error) {
112 |                 console.error('Error parsing JSON:', error);
113 |             }
114 |         });
115 |     </script>
116 |   </body>
117 | 
118 | </html>


--------------------------------------------------------------------------------
/src/query_graph/properties/subqueries.rs:
--------------------------------------------------------------------------------
  1 | use std::{any::TypeId, collections::BTreeSet, rc::Rc};
  2 | 
  3 | use crate::{
  4 |     query_graph::{visitor::QueryGraphPrePostVisitor, NodeId, QueryGraph},
  5 |     visitor_utils::PreOrderVisitationResult,
  6 | };
  7 | 
  8 | struct SubqueryPropertyTag;
  9 | 
 10 | /// Returns a set with the subqueries the node contains, if any.
 11 | pub fn subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc<BTreeSet<NodeId>> {
 12 |     let type_id = TypeId::of::<SubqueryPropertyTag>();
 13 |     if let Some(cached) = query_graph
 14 |         .property_cache
 15 |         .borrow_mut()
 16 |         .single_node_properties(node_id)
 17 |         .get(&type_id)
 18 |     {
 19 |         return cached
 20 |             .downcast_ref::<Rc<BTreeSet<NodeId>>>()
 21 |             .unwrap()
 22 |             .clone();
 23 |     }
 24 |     let subqueries = Rc::new(query_graph.node(node_id).collect_subqueries());
 25 |     query_graph
 26 |         .property_cache
 27 |         .borrow_mut()
 28 |         .single_node_properties(node_id)
 29 |         .insert(type_id, Box::new(subqueries.clone()));
 30 |     subqueries
 31 | }
 32 | 
 33 | /// Retrieve the subqueries within the given subgraph, but not the nested subqueries.
 34 | pub fn subgraph_subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc<BTreeSet<NodeId>> {
 35 |     SubgraphSubqueries::subgraph_subqueries(query_graph, node_id)
 36 | }
 37 | 
 38 | struct SubgraphSubqueries {}
 39 | 
 40 | impl SubgraphSubqueries {
 41 |     fn subgraph_subqueries(query_graph: &QueryGraph, node_id: NodeId) -> Rc<BTreeSet<NodeId>> {
 42 |         let mut visitor = SubgraphSubqueries {};
 43 |         query_graph.visit_subgraph(&mut visitor, node_id);
 44 |         visitor.subgraph_subqueries_unchecked(query_graph, node_id)
 45 |     }
 46 | 
 47 |     fn subgraph_subqueries_unchecked(
 48 |         &self,
 49 |         query_graph: &QueryGraph,
 50 |         node_id: NodeId,
 51 |     ) -> Rc<BTreeSet<NodeId>> {
 52 |         query_graph
 53 |             .property_cache
 54 |             .borrow_mut()
 55 |             .node_bottom_up_properties(node_id)
 56 |             .get(&Self::metadata_type_id())
 57 |             .unwrap()
 58 |             .downcast_ref::<Rc<BTreeSet<NodeId>>>()
 59 |             .unwrap()
 60 |             .clone()
 61 |     }
 62 | 
 63 |     fn metadata_type_id() -> TypeId {
 64 |         TypeId::of::<Self>()
 65 |     }
 66 | 
 67 |     fn compute_property_for_node(
 68 |         &self,
 69 |         query_graph: &QueryGraph,
 70 |         node_id: NodeId,
 71 |     ) -> Rc<BTreeSet<NodeId>> {
 72 |         let mut subqueries: BTreeSet<NodeId> = subqueries(query_graph, node_id).as_ref().clone();
 73 |         let query_node = query_graph.node(node_id);
 74 |         for input in 0..query_node.num_inputs() {
 75 |             let input_subqueries =
 76 |                 self.subgraph_subqueries_unchecked(query_graph, query_node.get_input(input));
 77 |             subqueries.extend(input_subqueries.iter());
 78 |         }
 79 |         Rc::new(subqueries)
 80 |     }
 81 | }
 82 | 
 83 | impl QueryGraphPrePostVisitor for SubgraphSubqueries {
 84 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
 85 |         if query_graph
 86 |             .property_cache
 87 |             .borrow_mut()
 88 |             .node_bottom_up_properties(node_id)
 89 |             .contains_key(&Self::metadata_type_id())
 90 |         {
 91 |             PreOrderVisitationResult::DoNotVisitInputs
 92 |         } else {
 93 |             PreOrderVisitationResult::VisitInputs
 94 |         }
 95 |     }
 96 | 
 97 |     fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) {
 98 |         if !query_graph
 99 |             .property_cache
100 |             .borrow_mut()
101 |             .node_bottom_up_properties(node_id)
102 |             .contains_key(&Self::metadata_type_id())
103 |         {
104 |             let correlated_input_refs = self.compute_property_for_node(query_graph, node_id);
105 |             query_graph
106 |                 .property_cache
107 |                 .borrow_mut()
108 |                 .node_bottom_up_properties(node_id)
109 |                 .insert(Self::metadata_type_id(), Box::new(correlated_input_refs));
110 |         }
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/src/scalar_expr/reduction.rs:
--------------------------------------------------------------------------------
  1 | //! Utilities for reducing scalar expression
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::{
  6 |     data_type::DataType,
  7 |     query_graph::{properties::num_columns, QueryGraph},
  8 | };
  9 | 
 10 | use super::{rewrite::rewrite_expr_pre_post, NaryOp, ScalarExpr, ScalarExprRef, Subquery};
 11 | 
 12 | /// Reduce the given expression recursively. Keeps trying until the expression cannot
 13 | /// be reduced any further.
 14 | pub fn reduce_expr_recursively(
 15 |     expr: &ScalarExprRef,
 16 |     query_graph: &QueryGraph,
 17 |     row_type: &[DataType],
 18 | ) -> ScalarExprRef {
 19 |     rewrite_expr_pre_post(
 20 |         &mut |curr_expr: &ScalarExprRef| reduce_expr(curr_expr, query_graph, row_type),
 21 |         &expr,
 22 |     )
 23 | }
 24 | 
 25 | pub fn reduce_expr(
 26 |     expr: &ScalarExprRef,
 27 |     query_graph: &QueryGraph,
 28 |     row_type: &[DataType],
 29 | ) -> Option<ScalarExprRef> {
 30 |     if let ScalarExpr::NaryOp {
 31 |         op: NaryOp::And,
 32 |         operands,
 33 |     } = expr.as_ref()
 34 |     {
 35 |         if operands.iter().any(|o| **o == ScalarExpr::false_literal()) {
 36 |             return Some(ScalarExpr::false_literal().into());
 37 |         }
 38 |         if operands.iter().any(|o| **o == ScalarExpr::true_literal()) {
 39 |             let new_operands = operands
 40 |                 .iter()
 41 |                 .filter(|o| *o.as_ref() == ScalarExpr::true_literal())
 42 |                 .dedup()
 43 |                 .cloned()
 44 |                 .collect_vec();
 45 |             return Some(match new_operands.len() {
 46 |                 0 => ScalarExpr::true_literal().into(),
 47 |                 1 => new_operands[0].clone(),
 48 |                 _ => ScalarExpr::nary(NaryOp::And, new_operands).into(),
 49 |             });
 50 |         }
 51 |     }
 52 |     if let ScalarExpr::NaryOp {
 53 |         op: NaryOp::Or,
 54 |         operands,
 55 |     } = expr.as_ref()
 56 |     {
 57 |         if operands.iter().any(|o| **o == ScalarExpr::true_literal()) {
 58 |             return Some(ScalarExpr::true_literal().into());
 59 |         }
 60 |         if operands.iter().any(|o| **o == ScalarExpr::false_literal()) {
 61 |             let new_operands = operands
 62 |                 .iter()
 63 |                 .filter(|o| *o.as_ref() == ScalarExpr::false_literal())
 64 |                 .dedup()
 65 |                 .cloned()
 66 |                 .collect_vec();
 67 |             return Some(match new_operands.len() {
 68 |                 0 => ScalarExpr::false_literal().into(),
 69 |                 1 => new_operands[0].clone(),
 70 |                 _ => ScalarExpr::nary(NaryOp::And, new_operands).into(),
 71 |             });
 72 |         }
 73 |     }
 74 |     if let ScalarExpr::BinaryOp { op, left, right } = expr.as_ref() {
 75 |         if op.propagates_null() && (left.is_null() || right.is_null()) {
 76 |             return Some(ScalarExpr::null_literal(expr.data_type(query_graph, row_type)).into());
 77 |         }
 78 |     }
 79 |     None
 80 | }
 81 | 
 82 | pub fn reduce_and_prune_exists_subplans_recursively(
 83 |     expr: &ScalarExprRef,
 84 |     query_graph: &mut QueryGraph,
 85 |     row_type: &[DataType],
 86 | ) -> ScalarExprRef {
 87 |     rewrite_expr_pre_post(
 88 |         &mut |curr_expr: &ScalarExprRef| {
 89 |             prune_exists_subplan(curr_expr, query_graph)
 90 |                 .or_else(|| reduce_expr(curr_expr, query_graph, row_type))
 91 |         },
 92 |         &expr,
 93 |     )
 94 | }
 95 | 
 96 | pub fn prune_exists_subplan(
 97 |     expr: &ScalarExprRef,
 98 |     query_graph: &mut QueryGraph,
 99 | ) -> Option<ScalarExprRef> {
100 |     if let ScalarExpr::ExistsSubquery { subquery } = expr.as_ref() {
101 |         if num_columns(query_graph, subquery.root) > 0 {
102 |             // Skip the root node
103 |             let subquery_plan = query_graph.node(subquery.root).get_input(0);
104 |             let correlation = subquery.correlation.clone();
105 |             let project = query_graph.project(subquery_plan, vec![]);
106 |             let new_subquery_root = query_graph.add_subquery(project);
107 |             return Some(
108 |                 ScalarExpr::ExistsSubquery {
109 |                     subquery: Subquery {
110 |                         root: new_subquery_root,
111 |                         correlation,
112 |                     },
113 |                 }
114 |                 .into(),
115 |             );
116 |         }
117 |     }
118 | 
119 |     None
120 | }
121 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/aggregate_simplifier.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::BTreeSet;
  2 | 
  3 | use crate::{
  4 |     query_graph::{
  5 |         optimizer::{OptRuleType, SingleReplacementRule},
  6 |         properties::{equivalence_classes, num_columns},
  7 |         NodeId, QueryGraph, QueryNode,
  8 |     },
  9 |     scalar_expr::{
 10 |         equivalence_class::{find_class, EquivalenceClasses},
 11 |         rewrite::{rewrite_expr_post, rewrite_expr_vec},
 12 |         ScalarExpr, ScalarExprRef,
 13 |     },
 14 | };
 15 | 
 16 | /// Optimization rule that removes grouping key elements from an Aggregate node that
 17 | /// are either constants or that can be computed from the remaining ones.
 18 | ///
 19 | /// Note that the last constant element cannot be removed if it's the only grouping
 20 | /// key element, as that would make the aggregate always return a row.
 21 | pub struct AggregateSimplifierRule {}
 22 | 
 23 | impl SingleReplacementRule for AggregateSimplifierRule {
 24 |     fn rule_type(&self) -> OptRuleType {
 25 |         OptRuleType::Always
 26 |     }
 27 | 
 28 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
 29 |         if let QueryNode::Aggregate {
 30 |             group_key,
 31 |             aggregates,
 32 |             input,
 33 |         } = query_graph.node(node_id)
 34 |         {
 35 |             if group_key.len() < 2 {
 36 |                 return None;
 37 |             }
 38 |             let classes = equivalence_classes(query_graph, node_id);
 39 |             if classes.is_empty() {
 40 |                 return None;
 41 |             }
 42 |             if let Some((out_col, in_col, expr)) = find_redundant_key(group_key, &classes) {
 43 |                 let num_columns = num_columns(query_graph, node_id);
 44 |                 let new_aggregate = query_graph.add_node(QueryNode::Aggregate {
 45 |                     group_key: group_key
 46 |                         .iter()
 47 |                         .filter(|i| **i != in_col)
 48 |                         .cloned()
 49 |                         .collect(),
 50 |                     aggregates: aggregates.clone(),
 51 |                     input: *input,
 52 |                 });
 53 |                 let project = (0..num_columns)
 54 |                     .map(|i| {
 55 |                         if i != out_col {
 56 |                             ScalarExpr::input_ref(i).into()
 57 |                         } else {
 58 |                             expr.clone()
 59 |                         }
 60 |                     })
 61 |                     .collect::<Vec<_>>();
 62 |                 let project = update_project_after_pruning_column(project, out_col);
 63 |                 return Some(query_graph.project(new_aggregate, project));
 64 |             }
 65 |         }
 66 |         None
 67 |     }
 68 | }
 69 | 
 70 | /// Finds an element of the grouping key that can be written in terms of the rest or
 71 | /// it's a constant.
 72 | ///
 73 | /// Returns a triple with the position in which that element is projected by the Aggregate
 74 | /// operator, the input column and the expression it is equivalent to.
 75 | fn find_redundant_key(
 76 |     group_key: &BTreeSet<usize>,
 77 |     classes: &EquivalenceClasses,
 78 | ) -> Option<(usize, usize, ScalarExprRef)> {
 79 |     group_key.iter().enumerate().find_map(|(out_col, in_col)| {
 80 |         let input_ref = ScalarExpr::input_ref(out_col).into();
 81 |         if let Some(class_id) = find_class(&classes, &input_ref) {
 82 |             let class = &classes[class_id];
 83 |             // TODO(asenac) verify that other doesn't reference input_ref
 84 |             if let Some(other) = class.members.iter().find(|x| **x != input_ref).cloned() {
 85 |                 return Some((out_col, *in_col, other));
 86 |             }
 87 |         }
 88 |         None
 89 |     })
 90 | }
 91 | 
 92 | /// Rewrites the expressions in `project` so that all input refs after the pruned column
 93 | /// are shifted one position.
 94 | fn update_project_after_pruning_column(
 95 |     project: Vec<ScalarExprRef>,
 96 |     pruned_col: usize,
 97 | ) -> Vec<ScalarExprRef> {
 98 |     rewrite_expr_vec(&project, &mut |expr| {
 99 |         rewrite_expr_post(
100 |             &mut |e: &ScalarExprRef| {
101 |                 if let ScalarExpr::InputRef { index } = e.as_ref() {
102 |                     if *index > pruned_col {
103 |                         return Some(ScalarExpr::input_ref(index - 1).into());
104 |                     }
105 |                 }
106 |                 None
107 |             },
108 |             expr,
109 |         )
110 |     })
111 | }
112 | 


--------------------------------------------------------------------------------
/src/scalar_expr/equivalence_class.rs:
--------------------------------------------------------------------------------
  1 | use crate::scalar_expr::*;
  2 | use std::collections::{BTreeSet, HashMap};
  3 | 
  4 | /// An equivalence class is a group of expressions within a given context that
  5 | /// are known to always lead to the same values.
  6 | ///
  7 | /// If `ref_0 = ref_1` is known to be true, then `ref_0` and `ref_1` belong to
  8 | /// the same equivalence class.
  9 | pub struct EquivalenceClass {
 10 |     /// Indicates that any of the equality predicates that lead to this class
 11 |     /// was using the null-rejecting equality operator, ie. the SQL equality
 12 |     /// operator (`BinaryOp::Eq`), and hence, none of the expression within the
 13 |     /// class will evaluate to NULL.
 14 |     pub null_rejecting: bool,
 15 |     /// The list of expressions belonging to the class.
 16 |     pub members: BTreeSet<ScalarExprRef>,
 17 | }
 18 | 
 19 | pub type EquivalenceClasses = Vec<EquivalenceClass>;
 20 | 
 21 | impl EquivalenceClass {
 22 |     fn new(null_rejecting: bool, members: BTreeSet<ScalarExprRef>) -> Self {
 23 |         Self {
 24 |             null_rejecting,
 25 |             members,
 26 |         }
 27 |     }
 28 | 
 29 |     /// Merges two equivalence classes.
 30 |     fn merge(&mut self, mut other: Self) {
 31 |         self.null_rejecting = self.null_rejecting || other.null_rejecting;
 32 |         self.members.append(&mut other.members);
 33 |     }
 34 | }
 35 | 
 36 | /// Returns the index of the class within the given list of classes, if any,
 37 | /// the given expression belongs to.
 38 | pub fn find_class(classes: &EquivalenceClasses, expr: &ScalarExprRef) -> Option<usize> {
 39 |     classes.iter().enumerate().find_map(|(class_id, class)| {
 40 |         if class.members.contains(expr) {
 41 |             Some(class_id)
 42 |         } else {
 43 |             None
 44 |         }
 45 |     })
 46 | }
 47 | 
 48 | /// Extract the equivalence classes using the equality predicates among the given
 49 | /// list of predicates.
 50 | ///
 51 | /// The same expression cannot belong to the two different classes. If `ref_0 = ref_1`
 52 | /// and `ref_1 = ref_2` are present in the given list of predicates, then `ref_0`, `ref_1`
 53 | /// and `ref_2` are part of the same equivalence class.
 54 | pub fn extract_equivalence_classes(predicates: &Vec<ScalarExprRef>) -> EquivalenceClasses {
 55 |     let mut classes: EquivalenceClasses = Vec::new();
 56 |     for predicate in predicates.iter() {
 57 |         if let ScalarExpr::BinaryOp { op, left, right } = predicate.as_ref() {
 58 |             let null_rejecting = match op {
 59 |                 BinaryOp::RawEq => false,
 60 |                 BinaryOp::Eq => true,
 61 |                 _ => continue,
 62 |             };
 63 |             let left_class = find_class(&classes, left);
 64 |             let right_class = find_class(&classes, right);
 65 |             match (left_class, right_class) {
 66 |                 (None, None) => {
 67 |                     classes.push(EquivalenceClass::new(
 68 |                         null_rejecting,
 69 |                         BTreeSet::from([left.clone(), right.clone()]),
 70 |                     ));
 71 |                 }
 72 |                 (None, Some(class_id)) | (Some(class_id), None) => {
 73 |                     let new_class =
 74 |                         EquivalenceClass::new(true, BTreeSet::from([left.clone(), right.clone()]));
 75 |                     classes[class_id].merge(new_class);
 76 |                 }
 77 |                 (Some(class_left), Some(class_right)) => {
 78 |                     if class_left != class_right {
 79 |                         let min_class = std::cmp::min(class_left, class_right);
 80 |                         let max_class = std::cmp::max(class_left, class_right);
 81 |                         let removed_class = classes.remove(max_class);
 82 |                         classes[min_class].merge(removed_class);
 83 |                     }
 84 |                 }
 85 |             }
 86 |         }
 87 |     }
 88 |     classes
 89 | }
 90 | 
 91 | /// Converts a set of classes into a replacement map in order to replace each member of a
 92 | /// class with the first element of the class, ie. with the representative of the class.
 93 | pub fn to_replacement_map(classes: &EquivalenceClasses) -> HashMap<ScalarExprRef, ScalarExprRef> {
 94 |     classes
 95 |         .iter()
 96 |         .map(|class| {
 97 |             let first = class.members.first().unwrap();
 98 |             class
 99 |                 .members
100 |                 .iter()
101 |                 .skip(1)
102 |                 .filter(|other| !other.is_literal())
103 |                 .map(|other| (other.clone(), first.clone()))
104 |         })
105 |         .flatten()
106 |         .collect::<HashMap<_, _>>()
107 | }
108 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/join_pruning.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashSet;
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::{
  6 |     query_graph::{
  7 |         optimizer::{
  8 |             utils::{
  9 |                 apply_map_to_parents_and_replace_input, required_columns_from_parents,
 10 |                 required_columns_to_column_map,
 11 |             },
 12 |             OptRuleType, Rule,
 13 |         },
 14 |         properties::num_columns,
 15 |         NodeId, QueryGraph, QueryNode,
 16 |     },
 17 |     scalar_expr::{
 18 |         rewrite::{apply_column_map, rewrite_expr_vec},
 19 |         visitor::store_input_dependencies,
 20 |         ScalarExpr,
 21 |     },
 22 | };
 23 | 
 24 | /// Rule that given a shared join where all its parents are pruning projections, computes
 25 | /// the superset of columns required by all its parents, and prunes the columns not used
 26 | /// by any of them, replacing the parents of the join with projections over the pruned
 27 | /// join.
 28 | pub struct JoinPruningRule {}
 29 | 
 30 | impl Rule for JoinPruningRule {
 31 |     fn rule_type(&self) -> OptRuleType {
 32 |         OptRuleType::TopDown
 33 |     }
 34 | 
 35 |     fn apply(
 36 |         &self,
 37 |         query_graph: &mut QueryGraph,
 38 |         node_id: NodeId,
 39 |     ) -> Option<Vec<(NodeId, NodeId)>> {
 40 |         if let QueryNode::Join {
 41 |             join_type,
 42 |             left,
 43 |             right,
 44 |             conditions,
 45 |         } = query_graph.node(node_id)
 46 |         {
 47 |             if let Some(required_columns) = required_columns_from_parents(query_graph, node_id) {
 48 |                 // Rewrite conditions
 49 |                 let column_map = required_columns_to_column_map(&required_columns);
 50 |                 let mut required_columns_including_join = column_map
 51 |                     .iter()
 52 |                     .map(|(col, _)| *col)
 53 |                     .collect::<HashSet<_>>();
 54 |                 for condition in conditions.iter() {
 55 |                     store_input_dependencies(condition, &mut required_columns_including_join);
 56 |                 }
 57 |                 if required_columns_including_join.len() == num_columns(&query_graph, node_id) {
 58 |                     return None;
 59 |                 }
 60 |                 let join_column_map =
 61 |                     required_columns_to_column_map(&required_columns_including_join);
 62 |                 let new_conditions = rewrite_expr_vec(conditions, &mut |e| {
 63 |                     apply_column_map(e, &join_column_map).unwrap()
 64 |                 });
 65 | 
 66 |                 // Prune the branches
 67 |                 let left_num_columns = num_columns(query_graph, *left);
 68 |                 let (left_columns, right_columns): (Vec<usize>, Vec<usize>) =
 69 |                     required_columns_including_join
 70 |                         .iter()
 71 |                         .sorted()
 72 |                         .partition(|col| **col < left_num_columns);
 73 |                 let left_outputs = left_columns
 74 |                     .iter()
 75 |                     .map(|i| ScalarExpr::InputRef { index: *i }.into())
 76 |                     .collect::<Vec<_>>();
 77 |                 let right_outputs = right_columns
 78 |                     .iter()
 79 |                     .map(|i| {
 80 |                         ScalarExpr::InputRef {
 81 |                             index: *i - left_num_columns,
 82 |                         }
 83 |                         .into()
 84 |                     })
 85 |                     .collect::<Vec<_>>();
 86 |                 let left = *left;
 87 |                 let right = *right;
 88 |                 let join_type = *join_type;
 89 |                 let new_left = query_graph.project(left, left_outputs);
 90 |                 let new_right = query_graph.project(right, right_outputs);
 91 |                 let new_join = query_graph.join(join_type, new_left, new_right, new_conditions);
 92 | 
 93 |                 // Prune the columns used by the join conditions but not by the parents
 94 |                 let pruning_proj_outputs = required_columns_including_join
 95 |                     .iter()
 96 |                     .sorted()
 97 |                     .enumerate()
 98 |                     .filter(|(_, orig_col)| required_columns.contains(&orig_col))
 99 |                     .map(|(i, _)| ScalarExpr::input_ref(i).into())
100 |                     .collect();
101 |                 let pruning_proj = query_graph.project(new_join, pruning_proj_outputs);
102 | 
103 |                 // Rewrite the parent projections
104 |                 return Some(apply_map_to_parents_and_replace_input(
105 |                     query_graph,
106 |                     node_id,
107 |                     &column_map,
108 |                     pruning_proj,
109 |                 ));
110 |             }
111 |         }
112 |         None
113 |     }
114 | }
115 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/join_project_transpose.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::{
  6 |     query_graph::{
  7 |         optimizer::{utils::sort_projection, OptRuleType, SingleReplacementRule},
  8 |         properties::num_columns,
  9 |         NodeId, QueryGraph, QueryNode,
 10 |     },
 11 |     scalar_expr::{rewrite::apply_column_map, ScalarExpr},
 12 | };
 13 | 
 14 | /// Given a non-sorted projection at the input of a join, it creates a new join with
 15 | /// a sorted projection and adds a reordering projection on top of the new join to
 16 | /// leave the columns in the same order as before.
 17 | ///
 18 | /// This is a normalization rule for lifting column reordering towards the root of
 19 | /// the query graph.
 20 | pub struct JoinProjectTransposeRule {}
 21 | 
 22 | impl SingleReplacementRule for JoinProjectTransposeRule {
 23 |     fn rule_type(&self) -> OptRuleType {
 24 |         OptRuleType::BottomUp
 25 |     }
 26 | 
 27 |     fn apply(&self, query_graph: &mut QueryGraph, node_id: NodeId) -> Option<NodeId> {
 28 |         if let QueryNode::Join {
 29 |             join_type,
 30 |             left,
 31 |             right,
 32 |             conditions,
 33 |         } = query_graph.node(node_id)
 34 |         {
 35 |             // Lifting projection from the LHS
 36 |             if let QueryNode::Project {
 37 |                 outputs,
 38 |                 input: proj_input,
 39 |             } = query_graph.node(*left)
 40 |             {
 41 |                 if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) {
 42 |                     let left_num_columns = num_columns(query_graph, *left);
 43 |                     let right_num_columns = num_columns(query_graph, *right);
 44 |                     let column_map = reorder_map
 45 |                         .iter()
 46 |                         .enumerate()
 47 |                         .map(|(i, e)| (*e, i))
 48 |                         .chain(
 49 |                             (left_num_columns..left_num_columns + right_num_columns)
 50 |                                 .map(|i| (i, i)),
 51 |                         )
 52 |                         .collect::<HashMap<_, _>>();
 53 |                     let new_conditions = conditions
 54 |                         .iter()
 55 |                         .map(|c| apply_column_map(c, &column_map).unwrap())
 56 |                         .collect_vec();
 57 | 
 58 |                     let final_project = column_map
 59 |                         .iter()
 60 |                         .sorted_by_key(|(_, j)| *j)
 61 |                         .map(|(i, _)| ScalarExpr::input_ref(*i).into())
 62 |                         .collect_vec();
 63 | 
 64 |                     let join_type = *join_type;
 65 |                     let right = *right;
 66 |                     let new_left = query_graph.project(*proj_input, sorted_proj);
 67 |                     let new_join = query_graph.add_node(QueryNode::Join {
 68 |                         join_type,
 69 |                         conditions: new_conditions,
 70 |                         left: new_left,
 71 |                         right,
 72 |                     });
 73 | 
 74 |                     return Some(query_graph.project(new_join, final_project));
 75 |                 }
 76 |             }
 77 |             // Lifting projection from the RHS
 78 |             if let QueryNode::Project {
 79 |                 outputs,
 80 |                 input: proj_input,
 81 |             } = query_graph.node(*right)
 82 |             {
 83 |                 if let Some((reorder_map, sorted_proj)) = sort_projection(outputs) {
 84 |                     let left_num_columns = num_columns(query_graph, *left);
 85 |                     let column_map = reorder_map
 86 |                         .iter()
 87 |                         .enumerate()
 88 |                         .map(|(i, e)| (left_num_columns + *e, left_num_columns + i))
 89 |                         .chain((0..left_num_columns).map(|i| (i, i)))
 90 |                         .collect::<HashMap<_, _>>();
 91 |                     let new_conditions = conditions
 92 |                         .iter()
 93 |                         .map(|c| apply_column_map(c, &column_map).unwrap())
 94 |                         .collect_vec();
 95 | 
 96 |                     let final_project = column_map
 97 |                         .iter()
 98 |                         .sorted_by_key(|(_, j)| *j)
 99 |                         .map(|(i, _)| ScalarExpr::input_ref(*i).into())
100 |                         .collect_vec();
101 | 
102 |                     let join_type = *join_type;
103 |                     let left = *left;
104 |                     let new_right = query_graph.project(*proj_input, sorted_proj);
105 |                     let new_join = query_graph.add_node(QueryNode::Join {
106 |                         join_type,
107 |                         conditions: new_conditions,
108 |                         left,
109 |                         right: new_right,
110 |                     });
111 | 
112 |                     return Some(query_graph.project(new_join, final_project));
113 |                 }
114 |             }
115 |         }
116 |         None
117 |     }
118 | }
119 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/project_normalization.test:
--------------------------------------------------------------------------------
 1 | run
 2 | project_normalization_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 3
 7 |     - Row Type: string, string, bool
 8 |     - Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')
 9 |   [3] Project [ref_1, ref_2, eq(ref_2, 'hello')]
10 |       - Num Columns: 3
11 |       - Row Type: string, string, bool
12 |       - Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')
13 |     [2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]
14 |         - Num Columns: 5
15 |         - Row Type: string, string, string, string, string
16 |         - Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')
17 |       [1] TableScan id: 1
18 |           - Num Columns: 5
19 |           - Row Type: string, string, string, string, string
20 | 
21 | 
22 | Optimized:
23 | [0] QueryRoot
24 |     - Num Columns: 3
25 |     - Row Type: string, string, bool
26 |     - Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)
27 |   [4] Project [ref_1, 'hello', TRUE]
28 |       - Num Columns: 3
29 |       - Row Type: string, string, bool
30 |       - Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)
31 |     [5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')]
32 |         - Num Columns: 5
33 |         - Row Type: string, string, string, string, string
34 |         - Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')
35 |       [1] TableScan id: 1
36 |           - Num Columns: 5
37 |           - Row Type: string, string, string, string, string
38 | 
39 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"3","label":"[3] Project [ref_1, ref_2, eq(ref_2, 'hello')]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
40 | step ProjectNormalizationRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"3","label":"[3] Project [ref_1, ref_2, eq(ref_2, 'hello')]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: ref_2, raw_eq(ref_2, eq(ref_1, 'hello')), lt(ref_0, 'hello')"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"3","to":"4","label":"ProjectNormalizationRule"}]}
41 | step FilterNormalizationRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_1, 'hello'), eq(ref_2, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"5","label":"[5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"2","to":"5","label":"FilterNormalizationRule"}]}
42 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"4","label":"[4] Project [ref_1, 'hello', TRUE]","annotations":["Num Columns: 3","Row Type: string, string, bool","Pulled Up Predicates: raw_eq(ref_1, 'hello'), raw_eq(ref_2, TRUE), lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Filter [eq(ref_2, 'hello'), lt(ref_1, 'hello')]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: eq(ref_2, 'hello'), lt(ref_1, 'hello')"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"}]}
43 | ----
44 | ----
45 | 


--------------------------------------------------------------------------------
/src/query_graph/cloner.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     scalar_expr::ScalarExprRef,
  3 |     visitor_utils::{PostOrderVisitationResult, PreOrderVisitationResult},
  4 | };
  5 | 
  6 | use super::{visitor::QueryGraphPrePostVisitorMut, NodeId, QueryGraph, QueryNode};
  7 | 
  8 | /// Clones a subgraph, except for the pruned subgraphs, applying the given rewrite
  9 | /// to the expressions contained by the nodes.
 10 | pub fn deep_clone<P, R>(
 11 |     query_graph: &mut QueryGraph,
 12 |     subgraph: NodeId,
 13 |     prune: &P,
 14 |     rewrite: &mut R,
 15 | ) -> NodeId
 16 | where
 17 |     P: Fn(&QueryGraph, NodeId) -> bool,
 18 |     R: FnMut(&ScalarExprRef) -> ScalarExprRef,
 19 | {
 20 |     let mut cloner = DeepCloner::new(prune, rewrite);
 21 |     query_graph.visit_subgraph_mut(&mut cloner, subgraph);
 22 |     cloner.stack.first().cloned().unwrap()
 23 | }
 24 | 
 25 | struct DeepCloner<'a, P, R>
 26 | where
 27 |     P: Fn(&QueryGraph, NodeId) -> bool,
 28 |     R: FnMut(&ScalarExprRef) -> ScalarExprRef,
 29 | {
 30 |     stack: Vec<NodeId>,
 31 |     prune: &'a P,
 32 |     rewrite: &'a mut R,
 33 |     skip_post: bool,
 34 | }
 35 | 
 36 | impl<'a, P, R> DeepCloner<'a, P, R>
 37 | where
 38 |     P: Fn(&QueryGraph, NodeId) -> bool,
 39 |     R: FnMut(&ScalarExprRef) -> ScalarExprRef,
 40 | {
 41 |     fn new(prune: &'a P, rewrite: &'a mut R) -> Self {
 42 |         Self {
 43 |             stack: Vec::new(),
 44 |             prune,
 45 |             rewrite,
 46 |             skip_post: false,
 47 |         }
 48 |     }
 49 | 
 50 |     fn clone_with_new_inputs(&mut self, query_graph: &mut QueryGraph, node_id: NodeId) -> NodeId {
 51 |         let mut cloned_node = query_graph.node(node_id).clone();
 52 |         let num_inputs = cloned_node.num_inputs();
 53 |         let inputs = &self.stack[self.stack.len() - num_inputs..];
 54 |         match &mut cloned_node {
 55 |             QueryNode::Project { outputs, input } => {
 56 |                 outputs.iter_mut().for_each(|e| *e = (self.rewrite)(&e));
 57 |                 *input = inputs[0];
 58 |             }
 59 |             QueryNode::Filter { conditions, input } => {
 60 |                 conditions.iter_mut().for_each(|e| *e = (self.rewrite)(e));
 61 |                 *input = inputs[0];
 62 |             }
 63 |             QueryNode::TableScan {
 64 |                 table_id: _,
 65 |                 row_type: _,
 66 |             } => {}
 67 |             QueryNode::Join {
 68 |                 join_type: _,
 69 |                 conditions,
 70 |                 left,
 71 |                 right,
 72 |             } => {
 73 |                 conditions.iter_mut().for_each(|e| *e = (self.rewrite)(e));
 74 |                 *left = inputs[0];
 75 |                 *right = inputs[1];
 76 |             }
 77 |             QueryNode::Aggregate {
 78 |                 group_key: _,
 79 |                 aggregates: _,
 80 |                 input,
 81 |             } => *input = inputs[0],
 82 |             QueryNode::Union { inputs: inputs_ref } => *inputs_ref = inputs.to_vec(),
 83 |             QueryNode::Apply {
 84 |                 correlation: _,
 85 |                 left,
 86 |                 right,
 87 |                 apply_type: _,
 88 |             } => {
 89 |                 *left = inputs[0];
 90 |                 *right = inputs[1];
 91 |             }
 92 |             QueryNode::QueryRoot { .. } | QueryNode::SubqueryRoot { .. } => {
 93 |                 panic!("Root nodes cannot be cloned")
 94 |             }
 95 |         }
 96 |         self.stack.truncate(self.stack.len() - num_inputs);
 97 |         query_graph.add_node(cloned_node)
 98 |     }
 99 | }
100 | 
101 | impl<'a, P, R> QueryGraphPrePostVisitorMut for DeepCloner<'a, P, R>
102 | where
103 |     P: Fn(&QueryGraph, NodeId) -> bool,
104 |     R: FnMut(&ScalarExprRef) -> ScalarExprRef,
105 | {
106 |     fn visit_pre(
107 |         &mut self,
108 |         query_graph: &mut QueryGraph,
109 |         node_id: &mut NodeId,
110 |     ) -> PreOrderVisitationResult {
111 |         if (self.prune)(query_graph, *node_id) {
112 |             self.skip_post = true;
113 |             self.stack.push(*node_id);
114 |             PreOrderVisitationResult::DoNotVisitInputs
115 |         } else {
116 |             PreOrderVisitationResult::VisitInputs
117 |         }
118 |     }
119 | 
120 |     fn visit_post(
121 |         &mut self,
122 |         query_graph: &mut QueryGraph,
123 |         node_id: &mut NodeId,
124 |     ) -> PostOrderVisitationResult {
125 |         if self.skip_post {
126 |             self.skip_post = false;
127 |         } else {
128 |             let cloned_node = self.clone_with_new_inputs(query_graph, *node_id);
129 |             self.stack.push(cloned_node);
130 |         }
131 |         PostOrderVisitationResult::Continue
132 |     }
133 | }
134 | 
135 | #[cfg(test)]
136 | mod tests {
137 | 
138 |     use crate::{
139 |         query_graph::QueryGraph,
140 |         scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef},
141 |     };
142 | 
143 |     use super::deep_clone;
144 | 
145 |     /// Test that if no expression is rewritten the same node is returned.
146 |     #[test]
147 |     fn test_no_op() {
148 |         let mut query_graph = QueryGraph::new();
149 |         let table_scan_id = query_graph.table_scan(0, 10);
150 |         let filter_1: ScalarExprRef = ScalarExpr::input_ref(0)
151 |             .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into())
152 |             .into();
153 |         let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]);
154 |         let project_id = query_graph.project(
155 |             filter_id,
156 |             (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(),
157 |         );
158 |         query_graph.set_entry_node(project_id);
159 | 
160 |         let cloned_project_id = deep_clone(&mut query_graph, project_id, &|_, _| false, &mut |e| {
161 |             e.clone()
162 |         });
163 |         assert_eq!(cloned_project_id, project_id);
164 |     }
165 | }
166 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/filter_project_transpose.test:
--------------------------------------------------------------------------------
 1 | run
 2 | filter_project_transpose_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 3
 7 |     - Row Type: string, string, string
 8 |     - Pulled Up Predicates: gt(ref_2, ref_1)
 9 |   [3] Filter [gt(ref_2, ref_1)]
10 |       - Num Columns: 3
11 |       - Row Type: string, string, string
12 |       - Pulled Up Predicates: gt(ref_2, ref_1)
13 |     [2] Project [ref_4, ref_2, ref_3]
14 |         - Num Columns: 3
15 |         - Row Type: string, string, string
16 |       [1] TableScan id: 0
17 |           - Num Columns: 5
18 |           - Row Type: string, string, string, string, string
19 | 
20 | 
21 | Optimized:
22 | [0] QueryRoot
23 |     - Num Columns: 3
24 |     - Row Type: string, string, string
25 |     - Pulled Up Predicates: gt(ref_2, ref_1)
26 |   [6] Project [ref_4, ref_2, ref_3]
27 |       - Num Columns: 3
28 |       - Row Type: string, string, string
29 |       - Pulled Up Predicates: gt(ref_2, ref_1)
30 |     [5] Filter [gt(ref_3, ref_2)]
31 |         - Num Columns: 5
32 |         - Row Type: string, string, string, string, string
33 |         - Pulled Up Predicates: gt(ref_3, ref_2)
34 |       [1] TableScan id: 0
35 |           - Num Columns: 5
36 |           - Row Type: string, string, string, string, string
37 | 
38 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
39 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]}
40 | step FilterProjectTransposeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"3","label":"[3] Filter [gt(ref_2, ref_1)]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"2","label":"[2] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"3","to":"6","label":"FilterProjectTransposeRule"}]}
41 | step ProjectMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"4","to":"6","label":"ProjectMergeRule"}]}
42 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"6","label":"[6] Project [ref_4, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string","Pulled Up Predicates: gt(ref_2, ref_1)"]},{"id":"5","label":"[5] Filter [gt(ref_3, ref_2)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: gt(ref_3, ref_2)"]},{"id":"1","label":"[1] TableScan id: 0","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"}]}
43 | ----
44 | ----
45 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/cte_discovery.test:
--------------------------------------------------------------------------------
 1 | run rules=(RemovePassthroughProjectRule,CteDiscoveryRule)
 2 | cte_discovery_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 5
 7 |     - Row Type: string, string, string, string, string
 8 |     - Pulled Up Predicates: lt(ref_0, ref_1)
 9 |   [5] Union
10 |       - Num Columns: 5
11 |       - Row Type: string, string, string, string, string
12 |       - Pulled Up Predicates: lt(ref_0, ref_1)
13 |     [2] Filter [lt(ref_0, ref_1)]
14 |         - Num Columns: 5
15 |         - Row Type: string, string, string, string, string
16 |         - Pulled Up Predicates: lt(ref_0, ref_1)
17 |       [1] TableScan id: 1
18 |           - Num Columns: 5
19 |           - Row Type: string, string, string, string, string
20 |     [4] Filter [lt(ref_0, ref_1)]
21 |         - Num Columns: 5
22 |         - Row Type: string, string, string, string, string
23 |         - Pulled Up Predicates: lt(ref_0, ref_1)
24 |       [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]
25 |           - Num Columns: 5
26 |           - Row Type: string, string, string, string, string
27 |         Recurring node 1
28 | 
29 | 
30 | Optimized:
31 | [0] QueryRoot
32 |     - Num Columns: 5
33 |     - Row Type: string, string, string, string, string
34 |     - Pulled Up Predicates: lt(ref_0, ref_1)
35 |   [5] Union
36 |       - Num Columns: 5
37 |       - Row Type: string, string, string, string, string
38 |       - Pulled Up Predicates: lt(ref_0, ref_1)
39 |     [2] Filter [lt(ref_0, ref_1)]
40 |         - Num Columns: 5
41 |         - Row Type: string, string, string, string, string
42 |         - Pulled Up Predicates: lt(ref_0, ref_1)
43 |       [1] TableScan id: 1
44 |           - Num Columns: 5
45 |           - Row Type: string, string, string, string, string
46 |     Recurring node 2
47 | 
48 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"1","label":"input 0"}]}
49 | step RemovePassthroughProjectRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"1","label":"input 0"},{"from":"3","to":"1","label":"RemovePassthroughProjectRule"}]}
50 | step CteDiscoveryRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"1","label":"input 0"},{"from":"4","to":"2","label":"CteDiscoveryRule"}]}
51 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"2","label":"[2] Filter [lt(ref_0, ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Pulled Up Predicates: lt(ref_0, ref_1)"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"5","to":"2","label":"input 1"},{"from":"2","to":"1","label":"input 0"}]}
52 | ----
53 | ----
54 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/num_columns.rs:
--------------------------------------------------------------------------------
  1 | use std::any::TypeId;
  2 | 
  3 | use crate::{
  4 |     query_graph::{visitor::QueryGraphPrePostVisitor, *},
  5 |     visitor_utils::PreOrderVisitationResult,
  6 | };
  7 | 
  8 | /// Returns the number of columns the given node projects, caching the result in the
  9 | /// `QueryGraph` metadata.
 10 | // TODO(asenac) this will eventually be renamed as `row_type`, returning the data types
 11 | // of the columns projected by the given node.
 12 | pub fn num_columns(query_graph: &QueryGraph, node_id: NodeId) -> usize {
 13 |     NumColumns::num_columns(query_graph, node_id)
 14 | }
 15 | 
 16 | /// Helper function to include column information when explaining the plan.
 17 | pub fn num_columns_annotator(query_graph: &QueryGraph, node_id: NodeId) -> Option<String> {
 18 |     let num_columns = num_columns(query_graph, node_id);
 19 |     Some(format!("Num Columns: {}", num_columns,))
 20 | }
 21 | 
 22 | struct NumColumns {}
 23 | 
 24 | impl NumColumns {
 25 |     fn num_columns(query_graph: &QueryGraph, node_id: NodeId) -> usize {
 26 |         let mut visitor = NumColumns {};
 27 |         query_graph.visit_subgraph(&mut visitor, node_id);
 28 |         visitor.num_columns_unchecked(query_graph, node_id)
 29 |     }
 30 | 
 31 |     fn num_columns_unchecked(&self, query_graph: &QueryGraph, node_id: NodeId) -> usize {
 32 |         query_graph
 33 |             .property_cache
 34 |             .borrow_mut()
 35 |             .node_bottom_up_properties(node_id)
 36 |             .get(&Self::metadata_type_id())
 37 |             .unwrap()
 38 |             .downcast_ref::<usize>()
 39 |             .unwrap()
 40 |             .clone()
 41 |     }
 42 | 
 43 |     fn metadata_type_id() -> TypeId {
 44 |         TypeId::of::<Self>()
 45 |     }
 46 | 
 47 |     fn compute_num_columns_for_node(&self, query_graph: &QueryGraph, node_id: NodeId) -> usize {
 48 |         match query_graph.node(node_id) {
 49 |             QueryNode::QueryRoot { input } => {
 50 |                 if let Some(input) = input {
 51 |                     self.num_columns_unchecked(query_graph, *input)
 52 |                 } else {
 53 |                     0
 54 |                 }
 55 |             }
 56 |             QueryNode::Project { outputs, .. } => outputs.len(),
 57 |             QueryNode::Filter { input, .. } | QueryNode::SubqueryRoot { input } => {
 58 |                 self.num_columns_unchecked(query_graph, *input)
 59 |             }
 60 |             QueryNode::TableScan { row_type, .. } => row_type.len(),
 61 |             QueryNode::Join {
 62 |                 join_type,
 63 |                 left,
 64 |                 right,
 65 |                 ..
 66 |             } => {
 67 |                 let left_columns = if join_type.projects_columns_from_left() {
 68 |                     self.num_columns_unchecked(query_graph, *left)
 69 |                 } else {
 70 |                     0
 71 |                 };
 72 |                 let right_columns = if join_type.projects_columns_from_right() {
 73 |                     self.num_columns_unchecked(query_graph, *right)
 74 |                 } else {
 75 |                     0
 76 |                 };
 77 |                 left_columns + right_columns
 78 |             }
 79 |             QueryNode::Aggregate {
 80 |                 group_key,
 81 |                 aggregates,
 82 |                 ..
 83 |             } => group_key.len() + aggregates.len(),
 84 |             QueryNode::Union { inputs } => {
 85 |                 if inputs.is_empty() {
 86 |                     0
 87 |                 } else {
 88 |                     self.num_columns_unchecked(query_graph, inputs[0])
 89 |                 }
 90 |             }
 91 |             QueryNode::Apply { left, right, .. } => {
 92 |                 let left_columns = self.num_columns_unchecked(query_graph, *left);
 93 |                 let right_columns = self.num_columns_unchecked(query_graph, *right);
 94 |                 left_columns + right_columns
 95 |             }
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | impl QueryGraphPrePostVisitor for NumColumns {
101 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
102 |         if query_graph
103 |             .property_cache
104 |             .borrow_mut()
105 |             .node_bottom_up_properties(node_id)
106 |             .contains_key(&Self::metadata_type_id())
107 |         {
108 |             PreOrderVisitationResult::DoNotVisitInputs
109 |         } else {
110 |             PreOrderVisitationResult::VisitInputs
111 |         }
112 |     }
113 | 
114 |     fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) {
115 |         if !query_graph
116 |             .property_cache
117 |             .borrow_mut()
118 |             .node_bottom_up_properties(node_id)
119 |             .contains_key(&Self::metadata_type_id())
120 |         {
121 |             let num_columns = self.compute_num_columns_for_node(query_graph, node_id);
122 |             query_graph
123 |                 .property_cache
124 |                 .borrow_mut()
125 |                 .node_bottom_up_properties(node_id)
126 |                 .insert(Self::metadata_type_id(), Box::new(num_columns));
127 |         }
128 |     }
129 | }
130 | 
131 | #[cfg(test)]
132 | mod tests {
133 |     use crate::{
134 |         query_graph::QueryGraph,
135 |         scalar_expr::{BinaryOp, ScalarExpr, ScalarExprRef},
136 |     };
137 | 
138 |     use super::num_columns;
139 | 
140 |     #[test]
141 |     fn test_num_columns() {
142 |         let mut query_graph = QueryGraph::new();
143 |         let table_scan_id = query_graph.table_scan(0, 10);
144 |         let filter_1: ScalarExprRef = ScalarExpr::input_ref(0)
145 |             .binary(BinaryOp::Eq, ScalarExpr::input_ref(1).into())
146 |             .into();
147 |         let filter_id = query_graph.filter(table_scan_id, vec![filter_1.clone()]);
148 |         let project_id = query_graph.project(
149 |             filter_id,
150 |             (0..5).map(|i| ScalarExpr::input_ref(i).into()).collect(),
151 |         );
152 | 
153 |         assert_eq!(num_columns(&query_graph, table_scan_id), 10);
154 |         assert_eq!(num_columns(&query_graph, filter_id), 10);
155 |         assert_eq!(num_columns(&query_graph, project_id), 5);
156 |     }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/row_type.rs:
--------------------------------------------------------------------------------
  1 | use std::{any::TypeId, rc::Rc};
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::{
  6 |     data_type::DataType,
  7 |     query_graph::{visitor::QueryGraphPrePostVisitor, *},
  8 |     visitor_utils::PreOrderVisitationResult,
  9 | };
 10 | 
 11 | /// Returns the row type of the given node.
 12 | pub fn row_type(query_graph: &QueryGraph, node_id: NodeId) -> Rc<Vec<DataType>> {
 13 |     RowType::row_type(query_graph, node_id)
 14 | }
 15 | 
 16 | /// Given a join node returns the row type of the cross product of its operands.
 17 | /// This is the row type the expressions in the join refer to.
 18 | pub fn cross_product_row_type(query_graph: &QueryGraph, node_id: NodeId) -> Option<Vec<DataType>> {
 19 |     if let QueryNode::Join {
 20 |         join_type: _,
 21 |         conditions: _,
 22 |         left,
 23 |         right,
 24 |     } = query_graph.node(node_id)
 25 |     {
 26 |         Some(
 27 |             row_type(query_graph, *left)
 28 |                 .iter()
 29 |                 .chain(row_type(query_graph, *right).iter())
 30 |                 .cloned()
 31 |                 .collect_vec(),
 32 |         )
 33 |     } else {
 34 |         None
 35 |     }
 36 | }
 37 | 
 38 | /// Helper function to include row type information when explaining the plan.
 39 | pub fn row_type_annotator(query_graph: &QueryGraph, node_id: NodeId) -> Option<String> {
 40 |     let row_type = row_type(query_graph, node_id);
 41 |     Some(format!(
 42 |         "Row Type: {}",
 43 |         row_type
 44 |             .iter()
 45 |             .map(|data_type| format!("{}", data_type))
 46 |             .collect::<Vec<_>>()
 47 |             .join(", "),
 48 |     ))
 49 | }
 50 | 
 51 | struct RowType {}
 52 | 
 53 | impl RowType {
 54 |     fn row_type(query_graph: &QueryGraph, node_id: NodeId) -> Rc<Vec<DataType>> {
 55 |         let mut visitor = RowType {};
 56 |         query_graph.visit_subgraph(&mut visitor, node_id);
 57 |         visitor.row_type_unchecked(query_graph, node_id)
 58 |     }
 59 | 
 60 |     fn row_type_unchecked(&self, query_graph: &QueryGraph, node_id: NodeId) -> Rc<Vec<DataType>> {
 61 |         query_graph
 62 |             .property_cache
 63 |             .borrow_mut()
 64 |             .node_bottom_up_properties(node_id)
 65 |             .get(&Self::metadata_type_id())
 66 |             .unwrap()
 67 |             .downcast_ref::<Rc<Vec<DataType>>>()
 68 |             .unwrap()
 69 |             .clone()
 70 |     }
 71 | 
 72 |     fn metadata_type_id() -> TypeId {
 73 |         TypeId::of::<Self>()
 74 |     }
 75 | 
 76 |     fn compute_row_type_for_node(
 77 |         &self,
 78 |         query_graph: &QueryGraph,
 79 |         node_id: NodeId,
 80 |     ) -> Rc<Vec<DataType>> {
 81 |         match query_graph.node(node_id) {
 82 |             QueryNode::QueryRoot { input } => {
 83 |                 if let Some(input) = input {
 84 |                     self.row_type_unchecked(query_graph, *input)
 85 |                 } else {
 86 |                     Default::default()
 87 |                 }
 88 |             }
 89 |             QueryNode::Project { outputs, input } => {
 90 |                 let input_row_type = self.row_type_unchecked(query_graph, *input);
 91 |                 outputs
 92 |                     .iter()
 93 |                     .map(|e| e.data_type(query_graph, &input_row_type[..]))
 94 |                     .collect_vec()
 95 |                     .into()
 96 |             }
 97 |             QueryNode::Filter { input, .. } | QueryNode::SubqueryRoot { input } => {
 98 |                 self.row_type_unchecked(query_graph, *input)
 99 |             }
100 |             QueryNode::TableScan { row_type, .. } => row_type.clone(),
101 |             QueryNode::Join {
102 |                 join_type,
103 |                 left,
104 |                 right,
105 |                 ..
106 |             } => match join_type {
107 |                 JoinType::Inner
108 |                 | JoinType::LeftOuter
109 |                 | JoinType::RightOuter
110 |                 | JoinType::FullOuter => self
111 |                     .row_type_unchecked(query_graph, *left)
112 |                     .iter()
113 |                     .chain(self.row_type_unchecked(query_graph, *right).iter())
114 |                     .cloned()
115 |                     .collect_vec()
116 |                     .into(),
117 |                 JoinType::Semi | JoinType::Anti => self.row_type_unchecked(query_graph, *left),
118 |             },
119 |             QueryNode::Aggregate {
120 |                 group_key,
121 |                 aggregates,
122 |                 input,
123 |             } => {
124 |                 let input_row_type = self.row_type_unchecked(query_graph, *input);
125 |                 group_key
126 |                     .iter()
127 |                     .map(|e| input_row_type[*e].clone())
128 |                     .chain(aggregates.iter().map(|agg| agg.data_type(&*input_row_type)))
129 |                     .collect_vec()
130 |                     .into()
131 |             }
132 |             QueryNode::Union { inputs } => {
133 |                 if inputs.is_empty() {
134 |                     Default::default()
135 |                 } else {
136 |                     self.row_type_unchecked(query_graph, inputs[0])
137 |                 }
138 |             }
139 |             QueryNode::Apply { left, right, .. } => self
140 |                 .row_type_unchecked(query_graph, *left)
141 |                 .iter()
142 |                 .chain(self.row_type_unchecked(query_graph, *right).iter())
143 |                 .cloned()
144 |                 .collect_vec()
145 |                 .into(),
146 |         }
147 |     }
148 | }
149 | 
150 | impl QueryGraphPrePostVisitor for RowType {
151 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
152 |         if query_graph
153 |             .property_cache
154 |             .borrow_mut()
155 |             .node_bottom_up_properties(node_id)
156 |             .contains_key(&Self::metadata_type_id())
157 |         {
158 |             PreOrderVisitationResult::DoNotVisitInputs
159 |         } else {
160 |             PreOrderVisitationResult::VisitInputs
161 |         }
162 |     }
163 | 
164 |     fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) {
165 |         if !query_graph
166 |             .property_cache
167 |             .borrow_mut()
168 |             .node_bottom_up_properties(node_id)
169 |             .contains_key(&Self::metadata_type_id())
170 |         {
171 |             let row_type = self.compute_row_type_for_node(query_graph, node_id);
172 |             query_graph
173 |                 .property_cache
174 |                 .borrow_mut()
175 |                 .node_bottom_up_properties(node_id)
176 |                 .insert(Self::metadata_type_id(), Box::new(row_type));
177 |         }
178 |     }
179 | }
180 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/keys_filter.test:
--------------------------------------------------------------------------------
 1 | run
 2 | filter_keys_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 5
 7 |     - Row Type: string, string, string, string, string
 8 |     - Keys: [key: [], lower_bound: 0, upper_bound: 0]
 9 |   [2] Filter [FALSE]
10 |       - Num Columns: 5
11 |       - Row Type: string, string, string, string, string
12 |       - Keys: [key: [], lower_bound: 0, upper_bound: 0]
13 |     [1] TableScan id: 1
14 |         - Num Columns: 5
15 |         - Row Type: string, string, string, string, string
16 | 
17 | 
18 | Optimized:
19 | [0] QueryRoot
20 |     - Num Columns: 5
21 |     - Row Type: string, string, string, string, string
22 |     - Keys: [key: [], lower_bound: 0, upper_bound: 0]
23 |   [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]
24 |       - Num Columns: 5
25 |       - Row Type: string, string, string, string, string
26 |       - Keys: [key: [], lower_bound: 0, upper_bound: 0]
27 |     [2] Filter [FALSE]
28 |         - Num Columns: 5
29 |         - Row Type: string, string, string, string, string
30 |         - Keys: [key: [], lower_bound: 0, upper_bound: 0]
31 |       [1] TableScan id: 1
32 |           - Num Columns: 5
33 |           - Row Type: string, string, string, string, string
34 | 
35 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
36 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"3","label":"TopProjectionRule"}]}
37 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [FALSE]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
38 | ----
39 | ----
40 | 
41 | run
42 | filter_keys_2
43 | ----
44 | ----
45 | [0] QueryRoot
46 |     - Num Columns: 5
47 |     - Row Type: string, string, string, string, string
48 |     - Keys: [key: [], lower_bound: 0, upper_bound: 0]
49 |   [2] Filter [NULL]
50 |       - Num Columns: 5
51 |       - Row Type: string, string, string, string, string
52 |       - Keys: [key: [], lower_bound: 0, upper_bound: 0]
53 |     [1] TableScan id: 1
54 |         - Num Columns: 5
55 |         - Row Type: string, string, string, string, string
56 | 
57 | 
58 | Optimized:
59 | [0] QueryRoot
60 |     - Num Columns: 5
61 |     - Row Type: string, string, string, string, string
62 |     - Keys: [key: [], lower_bound: 0, upper_bound: 0]
63 |   [3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]
64 |       - Num Columns: 5
65 |       - Row Type: string, string, string, string, string
66 |       - Keys: [key: [], lower_bound: 0, upper_bound: 0]
67 |     [2] Filter [NULL]
68 |         - Num Columns: 5
69 |         - Row Type: string, string, string, string, string
70 |         - Keys: [key: [], lower_bound: 0, upper_bound: 0]
71 |       [1] TableScan id: 1
72 |           - Num Columns: 5
73 |           - Row Type: string, string, string, string, string
74 | 
75 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
76 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"3","label":"TopProjectionRule"}]}
77 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"3","label":"[3] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Filter [NULL]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
78 | ----
79 | ----
80 | 


--------------------------------------------------------------------------------
/src/query_graph/json.rs:
--------------------------------------------------------------------------------
  1 | //! JSON serializer for generating visual representations of the plans.
  2 | use std::collections::VecDeque;
  3 | 
  4 | use crate::{
  5 |     query_graph::{explain::explain_scalar_expr_vec, *},
  6 |     scalar_expr::ScalarExpr,
  7 |     visitor_utils::PreOrderVisitationResult,
  8 | };
  9 | 
 10 | use super::{
 11 |     properties::{default_annotators, subqueries},
 12 |     visitor::QueryGraphPrePostVisitor,
 13 | };
 14 | 
 15 | pub struct JsonSerializer<'a> {
 16 |     annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option<String>>,
 17 |     included_nodes: HashSet<NodeId>,
 18 |     graph: Graph,
 19 |     queue: VecDeque<NodeId>,
 20 | }
 21 | 
 22 | impl<'a> JsonSerializer<'a> {
 23 |     pub fn new(annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option<String>>) -> Self {
 24 |         Self {
 25 |             annotators,
 26 |             included_nodes: HashSet::new(),
 27 |             graph: Graph::new(),
 28 |             queue: VecDeque::new(),
 29 |         }
 30 |     }
 31 | 
 32 |     pub fn new_with_all_annotators() -> Self {
 33 |         Self::new(default_annotators())
 34 |     }
 35 | 
 36 |     /// Ensure the given subgraph is included in the output graph.
 37 |     pub fn add_subgraph(&mut self, query_graph: &QueryGraph, node_id: NodeId) {
 38 |         self.queue.push_back(node_id);
 39 |         while let Some(node_id) = self.queue.pop_front() {
 40 |             query_graph.visit_subgraph(self, node_id);
 41 |         }
 42 |     }
 43 | 
 44 |     pub fn add_node_replacement(
 45 |         &mut self,
 46 |         query_graph: &QueryGraph,
 47 |         original_node: NodeId,
 48 |         replacement_node: NodeId,
 49 |         label: String,
 50 |     ) {
 51 |         query_graph.visit_subgraph(self, original_node);
 52 |         query_graph.visit_subgraph(self, replacement_node);
 53 |         self.graph.edges.push(Edge {
 54 |             from: original_node.to_string(),
 55 |             to: replacement_node.to_string(),
 56 |             label,
 57 |         })
 58 |     }
 59 | 
 60 |     /// Finally, generate the JSON string.
 61 |     pub fn serialize(&self) -> Result<String, serde_json::Error> {
 62 |         serde_json::to_string(&self.graph)
 63 |     }
 64 | }
 65 | 
 66 | impl<'a> QueryGraphPrePostVisitor for JsonSerializer<'a> {
 67 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
 68 |         if !self.included_nodes.insert(node_id) {
 69 |             return PreOrderVisitationResult::DoNotVisitInputs;
 70 |         }
 71 |         let prefix = format!("[{}] ", node_id);
 72 |         let label = match query_graph.node(node_id) {
 73 |             QueryNode::QueryRoot { .. } => {
 74 |                 format!("{}QueryRoot", prefix)
 75 |             }
 76 |             QueryNode::Project { outputs, .. } => {
 77 |                 format!("{}Project [{}]", prefix, explain_scalar_expr_vec(outputs))
 78 |             }
 79 |             QueryNode::Filter { conditions, .. } => {
 80 |                 format!("{}Filter [{}]", prefix, explain_scalar_expr_vec(conditions),)
 81 |             }
 82 |             QueryNode::TableScan { table_id, .. } => {
 83 |                 format!("{}TableScan id: {}", prefix, table_id)
 84 |             }
 85 |             QueryNode::Join {
 86 |                 join_type,
 87 |                 conditions,
 88 |                 ..
 89 |             } => {
 90 |                 format!(
 91 |                     "{}{} Join [{}]",
 92 |                     prefix,
 93 |                     join_type,
 94 |                     explain_scalar_expr_vec(conditions)
 95 |                 )
 96 |             }
 97 |             QueryNode::Aggregate {
 98 |                 group_key,
 99 |                 aggregates,
100 |                 ..
101 |             } => format!(
102 |                 "{}Aggregate key: [{}], aggregates: [{}]",
103 |                 prefix,
104 |                 group_key
105 |                     .iter()
106 |                     .map(|e| format!("{}", ScalarExpr::input_ref(*e)))
107 |                     .collect::<Vec<_>>()
108 |                     .join(", "),
109 |                 aggregates
110 |                     .iter()
111 |                     .map(|e| format!("{}", e))
112 |                     .collect::<Vec<_>>()
113 |                     .join(", "),
114 |             ),
115 |             QueryNode::Union { .. } => format!("{}Union", prefix),
116 |             QueryNode::SubqueryRoot { .. } => format!("{}SubqueryRoot", prefix),
117 |             QueryNode::Apply {
118 |                 correlation,
119 |                 apply_type,
120 |                 ..
121 |             } => {
122 |                 format!(
123 |                     "{}{} Apply parameters: [{}]",
124 |                     prefix,
125 |                     apply_type,
126 |                     explain_scalar_expr_vec(&correlation.parameters),
127 |                 )
128 |             }
129 |         };
130 |         let mut annotations = Vec::new();
131 |         for annotator in self.annotators.iter() {
132 |             if let Some(annotation) = (annotator)(query_graph, node_id) {
133 |                 annotations.push(annotation);
134 |             }
135 |         }
136 |         self.graph.nodes.push(Node {
137 |             id: node_id.to_string(),
138 |             label: label,
139 |             annotations,
140 |         });
141 |         let node = query_graph.node(node_id);
142 |         for i in 0..node.num_inputs() {
143 |             let to = node.get_input(i);
144 |             self.graph.edges.push(Edge {
145 |                 from: node_id.to_string(),
146 |                 to: to.to_string(),
147 |                 label: format!("input {}", i),
148 |             });
149 |         }
150 | 
151 |         // Link the current node with the subqueries it references
152 |         let subqueries = subqueries(query_graph, node_id);
153 |         for subquery_root in subqueries.iter() {
154 |             self.queue.push_back(*subquery_root);
155 |             self.graph.edges.push(Edge {
156 |                 from: node_id.to_string(),
157 |                 to: subquery_root.to_string(),
158 |                 label: format!("subquery({})", subquery_root),
159 |             });
160 |         }
161 |         return PreOrderVisitationResult::VisitInputs;
162 |     }
163 | 
164 |     fn visit_post(&mut self, _: &QueryGraph, _: NodeId) {}
165 | }
166 | 
167 | #[derive(Serialize, Deserialize)]
168 | pub struct Node {
169 |     id: String,
170 |     label: String,
171 |     annotations: Vec<String>,
172 | }
173 | 
174 | #[derive(Serialize, Deserialize)]
175 | pub struct Edge {
176 |     from: String,
177 |     to: String,
178 |     label: String,
179 | }
180 | 
181 | #[derive(Serialize, Deserialize)]
182 | pub struct Graph {
183 |     nodes: Vec<Node>,
184 |     edges: Vec<Edge>,
185 | }
186 | 
187 | impl Graph {
188 |     fn new() -> Self {
189 |         Self {
190 |             nodes: Vec::new(),
191 |             edges: Vec::new(),
192 |         }
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/union_pruning.test:
--------------------------------------------------------------------------------
 1 | run
 2 | union_pruning
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 2
 7 |     - Row Type: string, string
 8 |   [5] Union
 9 |       - Num Columns: 2
10 |       - Row Type: string, string
11 |     [3] Project [ref_0, ref_2]
12 |         - Num Columns: 2
13 |         - Row Type: string, string
14 |       [2] Union
15 |           - Num Columns: 10
16 |           - Row Type: string, string, string, string, string, string, string, string, string, string
17 |         [1] TableScan id: 1
18 |             - Num Columns: 10
19 |             - Row Type: string, string, string, string, string, string, string, string, string, string
20 |         Recurring node 1
21 |     [4] Project [ref_3, ref_2]
22 |         - Num Columns: 2
23 |         - Row Type: string, string
24 |       Recurring node 2
25 | 
26 | 
27 | Optimized:
28 | [0] QueryRoot
29 |     - Num Columns: 2
30 |     - Row Type: string, string
31 |   [6] Project [ref_0, ref_1]
32 |       - Num Columns: 2
33 |       - Row Type: string, string
34 |     [5] Union
35 |         - Num Columns: 2
36 |         - Row Type: string, string
37 |       [9] Project [ref_0, ref_1]
38 |           - Num Columns: 2
39 |           - Row Type: string, string
40 |         [8] Union
41 |             - Num Columns: 3
42 |             - Row Type: string, string, string
43 |           [7] Project [ref_0, ref_2, ref_3]
44 |               - Num Columns: 3
45 |               - Row Type: string, string, string
46 |             [1] TableScan id: 1
47 |                 - Num Columns: 10
48 |                 - Row Type: string, string, string, string, string, string, string, string, string, string
49 |           Recurring node 7
50 |       [10] Project [ref_2, ref_1]
51 |           - Num Columns: 2
52 |           - Row Type: string, string
53 |         Recurring node 8
54 | 
55 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"}]}
56 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"6","label":"TopProjectionRule"}]}
57 | step UnionPruningRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"3","label":"[3] Project [ref_0, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_3, ref_2]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"9","label":"[9] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"8","label":"[8] Union","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"7","label":"[7] Project [ref_0, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"10","label":"[10] Project [ref_2, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"3","label":"input 0"},{"from":"5","to":"4","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"2","label":"input 0"},{"from":"9","to":"8","label":"input 0"},{"from":"8","to":"7","label":"input 0"},{"from":"8","to":"7","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"3","to":"9","label":"UnionPruningRule"},{"from":"10","to":"8","label":"input 0"},{"from":"4","to":"10","label":"UnionPruningRule"}]}
58 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"6","label":"[6] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"5","label":"[5] Union","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"9","label":"[9] Project [ref_0, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]},{"id":"8","label":"[8] Union","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"7","label":"[7] Project [ref_0, ref_2, ref_3]","annotations":["Num Columns: 3","Row Type: string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"10","label":"[10] Project [ref_2, ref_1]","annotations":["Num Columns: 2","Row Type: string, string"]}],"edges":[{"from":"0","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"9","label":"input 0"},{"from":"5","to":"10","label":"input 1"},{"from":"9","to":"8","label":"input 0"},{"from":"8","to":"7","label":"input 0"},{"from":"8","to":"7","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"10","to":"8","label":"input 0"}]}
59 | ----
60 | ----
61 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/union_merge.test:
--------------------------------------------------------------------------------
 1 | run
 2 | union_merge
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 10
 7 |     - Row Type: string, string, string, string, string, string, string, string, string, string
 8 |   [4] Union
 9 |       - Num Columns: 10
10 |       - Row Type: string, string, string, string, string, string, string, string, string, string
11 |     [2] Union
12 |         - Num Columns: 10
13 |         - Row Type: string, string, string, string, string, string, string, string, string, string
14 |       [1] TableScan id: 1
15 |           - Num Columns: 10
16 |           - Row Type: string, string, string, string, string, string, string, string, string, string
17 |       Recurring node 1
18 |     [3] Union
19 |         - Num Columns: 10
20 |         - Row Type: string, string, string, string, string, string, string, string, string, string
21 |       Recurring node 2
22 |       Recurring node 2
23 |       Recurring node 1
24 | 
25 | 
26 | Optimized:
27 | [0] QueryRoot
28 |     - Num Columns: 10
29 |     - Row Type: string, string, string, string, string, string, string, string, string, string
30 |   [5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]
31 |       - Num Columns: 10
32 |       - Row Type: string, string, string, string, string, string, string, string, string, string
33 |     [6] Union
34 |         - Num Columns: 10
35 |         - Row Type: string, string, string, string, string, string, string, string, string, string
36 |       [1] TableScan id: 1
37 |           - Num Columns: 10
38 |           - Row Type: string, string, string, string, string, string, string, string, string, string
39 |       Recurring node 1
40 |       Recurring node 1
41 |       Recurring node 1
42 |       Recurring node 1
43 |       Recurring node 1
44 |       Recurring node 1
45 | 
46 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"}]}
47 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"},{"from":"5","to":"4","label":"input 0"},{"from":"4","to":"5","label":"TopProjectionRule"}]}
48 | step UnionMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"4","label":"[4] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"2","label":"[2] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"3","label":"[3] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"6","label":"[6] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"4","label":"input 0"},{"from":"4","to":"2","label":"input 0"},{"from":"4","to":"3","label":"input 1"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"3","to":"2","label":"input 0"},{"from":"3","to":"2","label":"input 1"},{"from":"3","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 3"},{"from":"6","to":"1","label":"input 4"},{"from":"6","to":"1","label":"input 5"},{"from":"6","to":"1","label":"input 6"},{"from":"4","to":"6","label":"UnionMergeRule"}]}
49 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"6","label":"[6] Union","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"5","label":"input 0"},{"from":"5","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 2"},{"from":"6","to":"1","label":"input 3"},{"from":"6","to":"1","label":"input 4"},{"from":"6","to":"1","label":"input 5"},{"from":"6","to":"1","label":"input 6"}]}
50 | ----
51 | ----
52 | 


--------------------------------------------------------------------------------
/src/query_graph/optimizer/rules/common_aggregate_discovery.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::{BTreeSet, HashMap};
  2 | 
  3 | use itertools::Itertools;
  4 | 
  5 | use crate::{
  6 |     query_graph::{
  7 |         optimizer::{OptRuleType, Rule},
  8 |         NodeId, QueryGraph, QueryNode,
  9 |     },
 10 |     scalar_expr::{
 11 |         rewrite::{dereference_extended_scalar_expr, dereference_scalar_expr},
 12 |         AggregateExpr, ExtendedScalarExpr, ExtendedScalarExprRef, ScalarExpr, ScalarExprRef,
 13 |         ToExtendedExpr, ToScalarExpr,
 14 |     },
 15 | };
 16 | 
 17 | #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
 18 | struct AggregateKey {
 19 |     input: NodeId,
 20 |     group_key: Vec<ScalarExprRef>,
 21 | }
 22 | 
 23 | struct AggregateValue {
 24 |     agg_node_id: NodeId,
 25 |     aggregates: Vec<ExtendedScalarExprRef>,
 26 | }
 27 | 
 28 | /// Rule that folds aggregates over the same input with the same grouping key into a single
 29 | /// shared aggregate node.
 30 | pub struct CommonAggregateDiscoveryRule {}
 31 | 
 32 | impl Rule for CommonAggregateDiscoveryRule {
 33 |     fn rule_type(&self) -> OptRuleType {
 34 |         OptRuleType::RootOnly
 35 |     }
 36 | 
 37 |     fn apply(&self, query_graph: &mut QueryGraph, _: NodeId) -> Option<Vec<(NodeId, NodeId)>> {
 38 |         // Collect and classify  all the aggregate nodes in the query graph
 39 |         let mut classified_aggregates: HashMap<AggregateKey, Vec<AggregateValue>> = HashMap::new();
 40 |         for node_id in query_graph.nodes.keys().sorted() {
 41 |             if let QueryNode::Aggregate {
 42 |                 group_key,
 43 |                 aggregates,
 44 |                 input,
 45 |             } = query_graph.node(*node_id)
 46 |             {
 47 |                 let mut group_key = group_key
 48 |                     .iter()
 49 |                     .map(|i| ScalarExpr::InputRef { index: *i }.into())
 50 |                     .collect_vec();
 51 |                 let mut aggregates = aggregates
 52 |                     .iter()
 53 |                     .map(|agg| agg.to_extended_expr())
 54 |                     .collect_vec();
 55 |                 let mut normalized_input = *input;
 56 |                 // Let's absorb projections so that we can still fold the following
 57 |                 // two aggregations into a single one:
 58 |                 //
 59 |                 // Aggregate key[ref_0], Aggregates[max(ref_1)]
 60 |                 //   Project ref_0, ref_1 + ref_2
 61 |                 //     Shared node X
 62 |                 //
 63 |                 // Aggregate key[ref_0], Aggregates[max(ref_1)]
 64 |                 //   Project ref_0, ref_1 + ref_3
 65 |                 //     Shared node X
 66 |                 //
 67 |                 // The resulting aggregate will be:
 68 |                 //
 69 |                 // Aggregate key[ref_0], Aggregates[max(ref_1), max(ref_2]
 70 |                 //   Project ref_0, ref_1 + ref_2, ref_1 + ref_3
 71 |                 //     Shared node X
 72 |                 while let QueryNode::Project { outputs, input } = query_graph.node(normalized_input)
 73 |                 {
 74 |                     let extended_outputs =
 75 |                         outputs.iter().map(|e| e.to_extended_expr()).collect_vec();
 76 |                     for key in group_key.iter_mut() {
 77 |                         *key = dereference_scalar_expr(key, &outputs);
 78 |                     }
 79 |                     for agg in aggregates.iter_mut() {
 80 |                         *agg = dereference_extended_scalar_expr(agg, &extended_outputs);
 81 |                     }
 82 |                     normalized_input = *input;
 83 |                 }
 84 | 
 85 |                 classified_aggregates
 86 |                     .entry(AggregateKey {
 87 |                         input: normalized_input,
 88 |                         group_key,
 89 |                     })
 90 |                     .or_insert_with(|| Vec::new())
 91 |                     .push(AggregateValue {
 92 |                         agg_node_id: *node_id,
 93 |                         aggregates,
 94 |                     })
 95 |             }
 96 |         }
 97 |         let mut result: Option<Vec<(NodeId, NodeId)>> = None;
 98 |         let mut it = classified_aggregates.iter().filter(|(_, v)| v.len() > 1);
 99 |         while let Some((key, values)) = it.next() {
100 |             let new_group_key = (0..key.group_key.len()).collect::<BTreeSet<_>>();
101 |             let mut input_project = key.group_key.clone();
102 |             let all_aggregates = values
103 |                 .iter()
104 |                 .map(|v| v.aggregates.iter())
105 |                 .flatten()
106 |                 .sorted()
107 |                 .dedup()
108 |                 .collect_vec();
109 |             let new_aggregates = all_aggregates
110 |                 .iter()
111 |                 .map(|a| match a.as_ref() {
112 |                     ExtendedScalarExpr::Aggregate { op, operands } => {
113 |                         let operands = operands
114 |                             .iter()
115 |                             .map(|o| {
116 |                                 append_to_vector_if_not_present(
117 |                                     &mut input_project,
118 |                                     o.to_scalar_expr().unwrap(),
119 |                                 )
120 |                             })
121 |                             .collect_vec();
122 |                         AggregateExpr {
123 |                             op: op.clone(),
124 |                             operands,
125 |                         }
126 |                         .into()
127 |                     }
128 |                     _ => panic!(),
129 |                 })
130 |                 .collect_vec();
131 |             let input = query_graph.project(key.input, input_project);
132 |             let new_aggregate = query_graph.add_node(QueryNode::Aggregate {
133 |                 group_key: new_group_key,
134 |                 aggregates: new_aggregates,
135 |                 input,
136 |             });
137 |             for value in values.iter() {
138 |                 let project = (0..key.group_key.len())
139 |                     .chain(value.aggregates.iter().map(|a| {
140 |                         key.group_key.len()
141 |                             + all_aggregates
142 |                                 .iter()
143 |                                 .enumerate()
144 |                                 .find_map(|(i, o)| if *a == **o { Some(i) } else { None })
145 |                                 // the aggregate must be present in the list of aggregates
146 |                                 .unwrap()
147 |                     }))
148 |                     .map(|i| ScalarExpr::input_ref(i).into())
149 |                     .collect_vec();
150 |                 let new_project = query_graph.project(new_aggregate, project);
151 |                 result
152 |                     .get_or_insert_with(|| Vec::new())
153 |                     .push((value.agg_node_id, new_project));
154 |             }
155 |         }
156 |         result
157 |     }
158 | }
159 | 
160 | fn append_to_vector_if_not_present<E: Eq>(vec: &mut Vec<E>, e: E) -> usize {
161 |     if let Some(index) = vec
162 |         .iter()
163 |         .enumerate()
164 |         .find_map(|(i, o)| if e == *o { Some(i) } else { None })
165 |     {
166 |         index
167 |     } else {
168 |         vec.push(e);
169 |         vec.len() - 1
170 |     }
171 | }
172 | 


--------------------------------------------------------------------------------
/src/query_graph/explain.rs:
--------------------------------------------------------------------------------
  1 | use crate::query_graph::visitor::*;
  2 | use crate::query_graph::*;
  3 | use crate::scalar_expr::ScalarExpr;
  4 | use crate::visitor_utils::PreOrderVisitationResult;
  5 | 
  6 | use super::properties::default_annotators;
  7 | 
  8 | /// Utility for explaining a query graph.
  9 | pub struct Explainer<'a> {
 10 |     pub(super) query_graph: &'a QueryGraph,
 11 |     pub(super) annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option<String>>,
 12 |     pub(super) leaves: HashSet<NodeId>,
 13 |     pub(super) entry_point: NodeId,
 14 | }
 15 | 
 16 | impl<'a> Explainer<'a> {
 17 |     pub fn new(query_graph: &'a QueryGraph) -> Self {
 18 |         Self {
 19 |             query_graph,
 20 |             leaves: HashSet::new(),
 21 |             annotators: Vec::new(),
 22 |             entry_point: QueryGraph::ROOT_NODE_ID,
 23 |         }
 24 |     }
 25 | 
 26 |     pub fn with_all_annotators(self) -> Self {
 27 |         Self {
 28 |             query_graph: self.query_graph,
 29 |             leaves: self.leaves,
 30 |             annotators: default_annotators(),
 31 |             entry_point: self.entry_point,
 32 |         }
 33 |     }
 34 | 
 35 |     pub fn with_annotators(
 36 |         self,
 37 |         annotators: Vec<&'a dyn Fn(&QueryGraph, NodeId) -> Option<String>>,
 38 |     ) -> Self {
 39 |         Self {
 40 |             query_graph: self.query_graph,
 41 |             leaves: self.leaves,
 42 |             annotators,
 43 |             entry_point: self.entry_point,
 44 |         }
 45 |     }
 46 | 
 47 |     /// Treat the given nodes as leaves in the explain plan.
 48 |     pub fn with_leaves(self, leaves: HashSet<NodeId>) -> Self {
 49 |         Self {
 50 |             query_graph: self.query_graph,
 51 |             leaves,
 52 |             annotators: self.annotators,
 53 |             entry_point: self.entry_point,
 54 |         }
 55 |     }
 56 | 
 57 |     /// Override the entry point for the explain plan.
 58 |     pub fn with_entry_point(self, entry_point: NodeId) -> Self {
 59 |         Self {
 60 |             query_graph: self.query_graph,
 61 |             leaves: self.leaves,
 62 |             annotators: self.annotators,
 63 |             entry_point,
 64 |         }
 65 |     }
 66 | 
 67 |     /// Generate the explain plan.
 68 |     pub fn explain(&self) -> String {
 69 |         let mut explain = ExplainVisitor::new(self);
 70 |         self.query_graph
 71 |             .visit_subgraph(&mut explain, self.entry_point);
 72 |         let subquery_roots = self.query_graph.subquery_roots();
 73 |         for subquery_root in subquery_roots {
 74 |             explain.result += "\n";
 75 |             self.query_graph.visit_subgraph(&mut explain, subquery_root);
 76 |         }
 77 |         explain.result
 78 |     }
 79 | }
 80 | 
 81 | /// Explain functions.
 82 | impl QueryGraph {
 83 |     /// Returns a stringified version of the query graph.
 84 |     pub fn explain(&self) -> String {
 85 |         Explainer::new(&self).explain()
 86 |     }
 87 | 
 88 |     // Explains the query graph annotated with all available properties.
 89 |     pub fn fully_annotated_explain(&self) -> String {
 90 |         Explainer::new(self).with_all_annotators().explain()
 91 |     }
 92 | }
 93 | 
 94 | struct ExplainVisitor<'a> {
 95 |     indentation: usize,
 96 |     visited_nodes: HashSet<NodeId>,
 97 |     result: String,
 98 |     options: &'a Explainer<'a>,
 99 | }
100 | 
101 | impl<'a> ExplainVisitor<'a> {
102 |     fn new(options: &'a Explainer) -> Self {
103 |         Self {
104 |             indentation: 0,
105 |             visited_nodes: HashSet::new(),
106 |             result: String::new(),
107 |             options,
108 |         }
109 |     }
110 | }
111 | 
112 | impl<'a> QueryGraphPrePostVisitor for ExplainVisitor<'a> {
113 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
114 |         let line_prefix = (0..2 * self.indentation).map(|_| ' ').collect::<String>();
115 |         self.indentation += 1;
116 |         if !self.visited_nodes.insert(node_id) {
117 |             self.result += &format!("{}Recurring node {}\n", line_prefix, node_id);
118 |             return PreOrderVisitationResult::DoNotVisitInputs;
119 |         }
120 |         let prefix = format!("{}[{}] ", line_prefix, node_id);
121 |         let node = match query_graph.node(node_id) {
122 |             QueryNode::QueryRoot { .. } => {
123 |                 format!("{}QueryRoot\n", prefix)
124 |             }
125 |             QueryNode::Project { outputs, .. } => {
126 |                 format!("{}Project [{}]\n", prefix, explain_scalar_expr_vec(outputs))
127 |             }
128 |             QueryNode::Filter { conditions, .. } => {
129 |                 format!(
130 |                     "{}Filter [{}]\n",
131 |                     prefix,
132 |                     explain_scalar_expr_vec(conditions),
133 |                 )
134 |             }
135 |             QueryNode::TableScan { table_id, .. } => {
136 |                 format!("{}TableScan id: {}\n", prefix, table_id)
137 |             }
138 |             QueryNode::Join {
139 |                 join_type,
140 |                 conditions,
141 |                 ..
142 |             } => {
143 |                 format!(
144 |                     "{}{} Join [{}]\n",
145 |                     prefix,
146 |                     join_type,
147 |                     explain_scalar_expr_vec(conditions)
148 |                 )
149 |             }
150 |             QueryNode::Aggregate {
151 |                 group_key,
152 |                 aggregates,
153 |                 ..
154 |             } => format!(
155 |                 "{}Aggregate key: [{}], aggregates: [{}]\n",
156 |                 prefix,
157 |                 group_key
158 |                     .iter()
159 |                     .map(|e| format!("{}", ScalarExpr::input_ref(*e)))
160 |                     .collect::<Vec<_>>()
161 |                     .join(", "),
162 |                 aggregates
163 |                     .iter()
164 |                     .map(|e| format!("{}", e))
165 |                     .collect::<Vec<_>>()
166 |                     .join(", "),
167 |             ),
168 |             QueryNode::Union { .. } => format!("{}Union\n", prefix),
169 |             QueryNode::SubqueryRoot { .. } => format!("{}SubqueryRoot\n", prefix),
170 |             QueryNode::Apply {
171 |                 correlation,
172 |                 apply_type,
173 |                 ..
174 |             } => {
175 |                 format!(
176 |                     "{}{} Apply parameters: [{}]\n",
177 |                     prefix,
178 |                     apply_type,
179 |                     explain_scalar_expr_vec(&correlation.parameters),
180 |                 )
181 |             }
182 |         };
183 |         self.result += &node;
184 | 
185 |         for annotator in self.options.annotators.iter() {
186 |             if let Some(annotation) = (annotator)(query_graph, node_id) {
187 |                 self.result += format!("{}    - {}\n", line_prefix, annotation).as_str();
188 |             }
189 |         }
190 | 
191 |         if self.options.leaves.contains(&node_id) {
192 |             PreOrderVisitationResult::DoNotVisitInputs
193 |         } else {
194 |             PreOrderVisitationResult::VisitInputs
195 |         }
196 |     }
197 | 
198 |     fn visit_post(&mut self, _: &QueryGraph, _: NodeId) {
199 |         self.indentation -= 1;
200 |     }
201 | }
202 | 
203 | pub(crate) fn explain_scalar_expr_vec(vec: &Vec<ScalarExprRef>) -> String {
204 |     vec.iter()
205 |         .map(|e| format!("{}", e))
206 |         .collect::<Vec<_>>()
207 |         .join(", ")
208 | }
209 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "autocfg"
  7 | version = "1.1.0"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
 10 | 
 11 | [[package]]
 12 | name = "datadriven"
 13 | version = "0.7.0"
 14 | source = "registry+https://github.com/rust-lang/crates.io-index"
 15 | checksum = "df18f0e7700f33562d92872dae54641b07bba9dc6a68faa40bf77c6a19ad6f97"
 16 | dependencies = [
 17 |  "futures",
 18 |  "thiserror",
 19 | ]
 20 | 
 21 | [[package]]
 22 | name = "either"
 23 | version = "1.8.1"
 24 | source = "registry+https://github.com/rust-lang/crates.io-index"
 25 | checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
 26 | 
 27 | [[package]]
 28 | name = "futures"
 29 | version = "0.3.28"
 30 | source = "registry+https://github.com/rust-lang/crates.io-index"
 31 | checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40"
 32 | dependencies = [
 33 |  "futures-channel",
 34 |  "futures-core",
 35 |  "futures-executor",
 36 |  "futures-io",
 37 |  "futures-sink",
 38 |  "futures-task",
 39 |  "futures-util",
 40 | ]
 41 | 
 42 | [[package]]
 43 | name = "futures-channel"
 44 | version = "0.3.28"
 45 | source = "registry+https://github.com/rust-lang/crates.io-index"
 46 | checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2"
 47 | dependencies = [
 48 |  "futures-core",
 49 |  "futures-sink",
 50 | ]
 51 | 
 52 | [[package]]
 53 | name = "futures-core"
 54 | version = "0.3.28"
 55 | source = "registry+https://github.com/rust-lang/crates.io-index"
 56 | checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c"
 57 | 
 58 | [[package]]
 59 | name = "futures-executor"
 60 | version = "0.3.28"
 61 | source = "registry+https://github.com/rust-lang/crates.io-index"
 62 | checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0"
 63 | dependencies = [
 64 |  "futures-core",
 65 |  "futures-task",
 66 |  "futures-util",
 67 | ]
 68 | 
 69 | [[package]]
 70 | name = "futures-io"
 71 | version = "0.3.28"
 72 | source = "registry+https://github.com/rust-lang/crates.io-index"
 73 | checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964"
 74 | 
 75 | [[package]]
 76 | name = "futures-macro"
 77 | version = "0.3.28"
 78 | source = "registry+https://github.com/rust-lang/crates.io-index"
 79 | checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
 80 | dependencies = [
 81 |  "proc-macro2",
 82 |  "quote",
 83 |  "syn",
 84 | ]
 85 | 
 86 | [[package]]
 87 | name = "futures-sink"
 88 | version = "0.3.28"
 89 | source = "registry+https://github.com/rust-lang/crates.io-index"
 90 | checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e"
 91 | 
 92 | [[package]]
 93 | name = "futures-task"
 94 | version = "0.3.28"
 95 | source = "registry+https://github.com/rust-lang/crates.io-index"
 96 | checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65"
 97 | 
 98 | [[package]]
 99 | name = "futures-util"
100 | version = "0.3.28"
101 | source = "registry+https://github.com/rust-lang/crates.io-index"
102 | checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533"
103 | dependencies = [
104 |  "futures-channel",
105 |  "futures-core",
106 |  "futures-io",
107 |  "futures-macro",
108 |  "futures-sink",
109 |  "futures-task",
110 |  "memchr",
111 |  "pin-project-lite",
112 |  "pin-utils",
113 |  "slab",
114 | ]
115 | 
116 | [[package]]
117 | name = "itertools"
118 | version = "0.11.0"
119 | source = "registry+https://github.com/rust-lang/crates.io-index"
120 | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
121 | dependencies = [
122 |  "either",
123 | ]
124 | 
125 | [[package]]
126 | name = "itoa"
127 | version = "1.0.9"
128 | source = "registry+https://github.com/rust-lang/crates.io-index"
129 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38"
130 | 
131 | [[package]]
132 | name = "lazy_static"
133 | version = "1.4.0"
134 | source = "registry+https://github.com/rust-lang/crates.io-index"
135 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
136 | 
137 | [[package]]
138 | name = "memchr"
139 | version = "2.5.0"
140 | source = "registry+https://github.com/rust-lang/crates.io-index"
141 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
142 | 
143 | [[package]]
144 | name = "pin-project-lite"
145 | version = "0.2.10"
146 | source = "registry+https://github.com/rust-lang/crates.io-index"
147 | checksum = "4c40d25201921e5ff0c862a505c6557ea88568a4e3ace775ab55e93f2f4f9d57"
148 | 
149 | [[package]]
150 | name = "pin-utils"
151 | version = "0.1.0"
152 | source = "registry+https://github.com/rust-lang/crates.io-index"
153 | checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
154 | 
155 | [[package]]
156 | name = "proc-macro2"
157 | version = "1.0.66"
158 | source = "registry+https://github.com/rust-lang/crates.io-index"
159 | checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9"
160 | dependencies = [
161 |  "unicode-ident",
162 | ]
163 | 
164 | [[package]]
165 | name = "quote"
166 | version = "1.0.31"
167 | source = "registry+https://github.com/rust-lang/crates.io-index"
168 | checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0"
169 | dependencies = [
170 |  "proc-macro2",
171 | ]
172 | 
173 | [[package]]
174 | name = "rust-sql"
175 | version = "0.1.0"
176 | dependencies = [
177 |  "datadriven",
178 |  "itertools",
179 |  "lazy_static",
180 |  "serde",
181 |  "serde_derive",
182 |  "serde_json",
183 | ]
184 | 
185 | [[package]]
186 | name = "ryu"
187 | version = "1.0.15"
188 | source = "registry+https://github.com/rust-lang/crates.io-index"
189 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741"
190 | 
191 | [[package]]
192 | name = "serde"
193 | version = "1.0.171"
194 | source = "registry+https://github.com/rust-lang/crates.io-index"
195 | checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9"
196 | 
197 | [[package]]
198 | name = "serde_derive"
199 | version = "1.0.171"
200 | source = "registry+https://github.com/rust-lang/crates.io-index"
201 | checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682"
202 | dependencies = [
203 |  "proc-macro2",
204 |  "quote",
205 |  "syn",
206 | ]
207 | 
208 | [[package]]
209 | name = "serde_json"
210 | version = "1.0.103"
211 | source = "registry+https://github.com/rust-lang/crates.io-index"
212 | checksum = "d03b412469450d4404fe8499a268edd7f8b79fecb074b0d812ad64ca21f4031b"
213 | dependencies = [
214 |  "itoa",
215 |  "ryu",
216 |  "serde",
217 | ]
218 | 
219 | [[package]]
220 | name = "slab"
221 | version = "0.4.8"
222 | source = "registry+https://github.com/rust-lang/crates.io-index"
223 | checksum = "6528351c9bc8ab22353f9d776db39a20288e8d6c37ef8cfe3317cf875eecfc2d"
224 | dependencies = [
225 |  "autocfg",
226 | ]
227 | 
228 | [[package]]
229 | name = "syn"
230 | version = "2.0.26"
231 | source = "registry+https://github.com/rust-lang/crates.io-index"
232 | checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970"
233 | dependencies = [
234 |  "proc-macro2",
235 |  "quote",
236 |  "unicode-ident",
237 | ]
238 | 
239 | [[package]]
240 | name = "thiserror"
241 | version = "1.0.43"
242 | source = "registry+https://github.com/rust-lang/crates.io-index"
243 | checksum = "a35fc5b8971143ca348fa6df4f024d4d55264f3468c71ad1c2f365b0a4d58c42"
244 | dependencies = [
245 |  "thiserror-impl",
246 | ]
247 | 
248 | [[package]]
249 | name = "thiserror-impl"
250 | version = "1.0.43"
251 | source = "registry+https://github.com/rust-lang/crates.io-index"
252 | checksum = "463fe12d7993d3b327787537ce8dd4dfa058de32fc2b195ef3cde03dc4771e8f"
253 | dependencies = [
254 |  "proc-macro2",
255 |  "quote",
256 |  "syn",
257 | ]
258 | 
259 | [[package]]
260 | name = "unicode-ident"
261 | version = "1.0.11"
262 | source = "registry+https://github.com/rust-lang/crates.io-index"
263 | checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
264 | 


--------------------------------------------------------------------------------
/src/query_graph/properties/correlated_input_refs.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     any::TypeId,
  3 |     collections::{BTreeSet, HashMap},
  4 |     rc::Rc,
  5 | };
  6 | 
  7 | use itertools::Itertools;
  8 | 
  9 | use crate::{
 10 |     query_graph::{visitor::QueryGraphPrePostVisitor, NodeId, QueryGraph, QueryNode},
 11 |     scalar_expr::{visitor::visit_expr_pre, ScalarExpr},
 12 |     visitor_utils::PreOrderVisitationResult,
 13 | };
 14 | 
 15 | struct CorrelatedInputRefsTag;
 16 | 
 17 | /// Returns a set with the correlated input refs the node contains, if any.
 18 | pub fn node_correlated_input_refs(
 19 |     query_graph: &QueryGraph,
 20 |     node_id: NodeId,
 21 | ) -> Rc<HashMap<usize, BTreeSet<usize>>> {
 22 |     let type_id = TypeId::of::<CorrelatedInputRefsTag>();
 23 |     if let Some(cached) = query_graph
 24 |         .property_cache
 25 |         .borrow_mut()
 26 |         .single_node_properties(node_id)
 27 |         .get(&type_id)
 28 |     {
 29 |         return cached
 30 |             .downcast_ref::<Rc<HashMap<usize, BTreeSet<usize>>>>()
 31 |             .unwrap()
 32 |             .clone();
 33 |     }
 34 |     let mut correlated_cols = HashMap::new();
 35 |     let query_node = query_graph.node(node_id);
 36 |     query_node.visit_scalar_expr(&mut |expr| {
 37 |         visit_expr_pre(expr, &mut |curr_expr| {
 38 |             match curr_expr.as_ref() {
 39 |                 ScalarExpr::CorrelatedInputRef {
 40 |                     context_offset,
 41 |                     index,
 42 |                     ..
 43 |                 } => {
 44 |                     correlated_cols
 45 |                         .entry(*context_offset)
 46 |                         .or_insert_with(|| BTreeSet::new())
 47 |                         .insert(*index);
 48 |                 }
 49 |                 ScalarExpr::ExistsSubquery { subquery }
 50 |                 | ScalarExpr::ScalarSubquery { subquery }
 51 |                 | ScalarExpr::ScalarSubqueryCmp { subquery, .. } => {
 52 |                     let subquery_correlated_input_refs =
 53 |                         subgraph_correlated_input_refs(query_graph, subquery.root);
 54 |                     if subquery.correlation.is_some() {
 55 |                         let subquery_external_correlated_input_refs =
 56 |                             subquery_correlated_input_refs
 57 |                                 .iter()
 58 |                                 .filter(|(offset, _)| **offset > 0)
 59 |                                 .map(|(offset, columns)| (offset - 1, columns.clone()))
 60 |                                 .collect::<HashMap<usize, BTreeSet<usize>>>();
 61 |                         merge_correlated_maps(
 62 |                             subquery_external_correlated_input_refs.iter(),
 63 |                             &mut correlated_cols,
 64 |                         );
 65 |                     } else {
 66 |                         merge_correlated_maps(
 67 |                             subquery_correlated_input_refs.iter(),
 68 |                             &mut correlated_cols,
 69 |                         );
 70 |                     }
 71 |                 }
 72 |                 _ => (),
 73 |             }
 74 |             PreOrderVisitationResult::VisitInputs
 75 |         });
 76 |     });
 77 | 
 78 |     // Store the property in the cache
 79 |     let correlated_cols = Rc::new(correlated_cols);
 80 |     query_graph
 81 |         .property_cache
 82 |         .borrow_mut()
 83 |         .single_node_properties(node_id)
 84 |         .insert(type_id, Box::new(correlated_cols.clone()));
 85 |     correlated_cols
 86 | }
 87 | 
 88 | /// Returns a set with the correlated input refs in the given subplan that escape
 89 | /// the context of the subplan.
 90 | pub fn subgraph_correlated_input_refs(
 91 |     query_graph: &QueryGraph,
 92 |     node_id: NodeId,
 93 | ) -> Rc<HashMap<usize, BTreeSet<usize>>> {
 94 |     SubgraphCorrelatedInputRefs::subgraph_correlated_input_refs(query_graph, node_id)
 95 | }
 96 | 
 97 | pub fn subgraph_correlated_input_refs_annotator(
 98 |     query_graph: &QueryGraph,
 99 |     node_id: NodeId,
100 | ) -> Option<String> {
101 |     let correlated_cols = subgraph_correlated_input_refs(query_graph, node_id);
102 |     let correlated_cols = correlated_cols
103 |         .iter()
104 |         .sorted()
105 |         .map(|(offset, columns)| {
106 |             columns
107 |                 .iter()
108 |                 .map(|column| format!("ctx_{}.ref_{}", *offset, column))
109 |         })
110 |         .flatten()
111 |         .join(", ");
112 |     if correlated_cols.is_empty() {
113 |         None
114 |     } else {
115 |         Some(format!("Correlated References: {}", correlated_cols))
116 |     }
117 | }
118 | 
119 | struct SubgraphCorrelatedInputRefs {}
120 | 
121 | impl SubgraphCorrelatedInputRefs {
122 |     fn subgraph_correlated_input_refs(
123 |         query_graph: &QueryGraph,
124 |         node_id: NodeId,
125 |     ) -> Rc<HashMap<usize, BTreeSet<usize>>> {
126 |         let mut visitor = SubgraphCorrelatedInputRefs {};
127 |         query_graph.visit_subgraph(&mut visitor, node_id);
128 |         visitor.subgraph_correlated_input_refs_unchecked(query_graph, node_id)
129 |     }
130 | 
131 |     fn subgraph_correlated_input_refs_unchecked(
132 |         &self,
133 |         query_graph: &QueryGraph,
134 |         node_id: NodeId,
135 |     ) -> Rc<HashMap<usize, BTreeSet<usize>>> {
136 |         query_graph
137 |             .property_cache
138 |             .borrow_mut()
139 |             .node_bottom_up_properties(node_id)
140 |             .get(&Self::metadata_type_id())
141 |             .unwrap()
142 |             .downcast_ref::<Rc<HashMap<usize, BTreeSet<usize>>>>()
143 |             .unwrap()
144 |             .clone()
145 |     }
146 | 
147 |     fn metadata_type_id() -> TypeId {
148 |         TypeId::of::<Self>()
149 |     }
150 | 
151 |     fn compute_property_for_node(
152 |         &self,
153 |         query_graph: &QueryGraph,
154 |         node_id: NodeId,
155 |     ) -> Rc<HashMap<usize, BTreeSet<usize>>> {
156 |         // The correlated input refs in the node itself...
157 |         let mut correlated_cols: HashMap<usize, BTreeSet<usize>> =
158 |             node_correlated_input_refs(query_graph, node_id)
159 |                 .as_ref()
160 |                 .clone();
161 |         // ... and the ones under its child subgraphs, ...
162 |         let query_node = query_graph.node(node_id);
163 |         for input in 0..query_node.num_inputs() {
164 |             let input_correlated_cols = self
165 |                 .subgraph_correlated_input_refs_unchecked(query_graph, query_node.get_input(input));
166 |             merge_correlated_maps(input_correlated_cols.iter(), &mut correlated_cols);
167 |         }
168 |         //... but remove ones in the correlation scope the node defines.
169 |         if let QueryNode::Apply { .. } = &query_node {
170 |             correlated_cols = correlated_cols
171 |                 .into_iter()
172 |                 .filter(|(offset, _)| *offset > 0)
173 |                 .map(|(offset, columns)| (offset - 1, columns))
174 |                 .collect();
175 |         }
176 |         Rc::new(correlated_cols)
177 |     }
178 | }
179 | 
180 | impl QueryGraphPrePostVisitor for SubgraphCorrelatedInputRefs {
181 |     fn visit_pre(&mut self, query_graph: &QueryGraph, node_id: NodeId) -> PreOrderVisitationResult {
182 |         if query_graph
183 |             .property_cache
184 |             .borrow_mut()
185 |             .node_bottom_up_properties(node_id)
186 |             .contains_key(&Self::metadata_type_id())
187 |         {
188 |             PreOrderVisitationResult::DoNotVisitInputs
189 |         } else {
190 |             PreOrderVisitationResult::VisitInputs
191 |         }
192 |     }
193 | 
194 |     fn visit_post(&mut self, query_graph: &QueryGraph, node_id: NodeId) {
195 |         if !query_graph
196 |             .property_cache
197 |             .borrow_mut()
198 |             .node_bottom_up_properties(node_id)
199 |             .contains_key(&Self::metadata_type_id())
200 |         {
201 |             let correlated_input_refs = self.compute_property_for_node(query_graph, node_id);
202 |             query_graph
203 |                 .property_cache
204 |                 .borrow_mut()
205 |                 .node_bottom_up_properties(node_id)
206 |                 .insert(Self::metadata_type_id(), Box::new(correlated_input_refs));
207 |         }
208 |     }
209 | }
210 | 
211 | fn merge_correlated_maps<'a, I>(src: I, dst: &mut HashMap<usize, BTreeSet<usize>>)
212 | where
213 |     I: Iterator<Item = (&'a usize, &'a BTreeSet<usize>)>,
214 | {
215 |     for (context_offset, columns) in src {
216 |         dst.entry(*context_offset)
217 |             .or_insert_with(|| BTreeSet::new())
218 |             .extend(columns.iter());
219 |     }
220 | }
221 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/aggregate_project_transpose.test:
--------------------------------------------------------------------------------
 1 | run
 2 | aggregate_project_transpose_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 5
 7 |     - Row Type: string, string, string, string, string
 8 |     - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]
 9 |   [3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]
10 |       - Num Columns: 5
11 |       - Row Type: string, string, string, string, string
12 |       - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]
13 |     [2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]
14 |         - Num Columns: 5
15 |         - Row Type: string, string, string, string, string
16 |       [1] TableScan id: 1
17 |           - Num Columns: 5
18 |           - Row Type: string, string, string, string, string
19 | 
20 | 
21 | Optimized:
22 | [0] QueryRoot
23 |     - Num Columns: 5
24 |     - Row Type: string, string, string, string, string
25 |     - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]
26 |   [7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]
27 |       - Num Columns: 5
28 |       - Row Type: string, string, string, string, string
29 |       - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]
30 |     [6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]
31 |         - Num Columns: 5
32 |         - Row Type: string, string, string, string, string
33 |         - Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]
34 |       [1] TableScan id: 1
35 |           - Num Columns: 5
36 |           - Row Type: string, string, string, string, string
37 | 
38 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"}]}
39 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]}
40 | step AggregateProjectTransposeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"3","label":"[3] Aggregate key: [ref_0, ref_1, ref_2], aggregates: [min(ref_4), max(ref_3)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"2","label":"[2] Project [ref_4, ref_3, ref_2, ref_1, ref_0]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"3","to":"7","label":"AggregateProjectTransposeRule"}]}
41 | step ProjectMergeRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"4","to":"7","label":"ProjectMergeRule"}]}
42 | step RemovePassthroughProjectRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"5","label":"[5] Project [ref_0, ref_1, ref_2, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"5","label":"input 0"},{"from":"5","to":"1","label":"input 0"},{"from":"5","to":"1","label":"RemovePassthroughProjectRule"}]}
43 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"7","label":"[7] Project [ref_2, ref_1, ref_0, ref_3, ref_4]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"6","label":"[6] Aggregate key: [ref_2, ref_3, ref_4], aggregates: [min(ref_0), max(ref_1)]","annotations":["Num Columns: 5","Row Type: string, string, string, string, string","Keys: [key: [ref_0, ref_1, ref_2], lower_bound: 0, upper_bound: 1]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"7","label":"input 0"},{"from":"7","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"}]}
44 | ----
45 | ----
46 | 


--------------------------------------------------------------------------------
/tests/testdata/explain/expression_reduction.test:
--------------------------------------------------------------------------------
 1 | run
 2 | expression_reduction_1
 3 | ----
 4 | ----
 5 | [0] QueryRoot
 6 |     - Num Columns: 10
 7 |     - Row Type: string, string, string, string, string, string, string, string, string, string
 8 |     - Pulled Up Predicates: lt(NULL, ref_1)
 9 |   [3] Filter [lt(NULL, ref_1)]
10 |       - Num Columns: 10
11 |       - Row Type: string, string, string, string, string, string, string, string, string, string
12 |       - Pulled Up Predicates: lt(NULL, ref_1)
13 |     [2] Left Outer Join [eq(NULL, ref_5)]
14 |         - Num Columns: 10
15 |         - Row Type: string, string, string, string, string, string, string, string, string, string
16 |       [1] TableScan id: 1
17 |           - Num Columns: 5
18 |           - Row Type: string, string, string, string, string
19 |       Recurring node 1
20 | 
21 | 
22 | Optimized:
23 | [0] QueryRoot
24 |     - Num Columns: 10
25 |     - Row Type: string, string, string, string, string, string, string, string, string, string
26 |     - Keys: [key: [], lower_bound: 0, upper_bound: 0]
27 |   [4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]
28 |       - Num Columns: 10
29 |       - Row Type: string, string, string, string, string, string, string, string, string, string
30 |       - Keys: [key: [], lower_bound: 0, upper_bound: 0]
31 |     [5] Filter [NULL]
32 |         - Num Columns: 10
33 |         - Row Type: string, string, string, string, string, string, string, string, string, string
34 |         - Keys: [key: [], lower_bound: 0, upper_bound: 0]
35 |       [7] Inner Join [NULL]
36 |           - Num Columns: 10
37 |           - Row Type: string, string, string, string, string, string, string, string, string, string
38 |           - Keys: [key: [], lower_bound: 0, upper_bound: 0]
39 |         [1] TableScan id: 1
40 |             - Num Columns: 5
41 |             - Row Type: string, string, string, string, string
42 |         Recurring node 1
43 | 
44 | initial {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"}]}
45 | step TopProjectionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]}],"edges":[{"from":"0","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"4","label":"TopProjectionRule"}]}
46 | step ExpressionReductionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"3","label":"[3] Filter [lt(NULL, ref_1)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Pulled Up Predicates: lt(NULL, ref_1)"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"3","label":"input 0"},{"from":"3","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"5","to":"2","label":"input 0"},{"from":"3","to":"5","label":"ExpressionReductionRule"}]}
47 | step ExpressionReductionRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"2","label":"[2] Left Outer Join [eq(NULL, ref_5)]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"6","label":"[6] Left Outer Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"2","label":"input 0"},{"from":"2","to":"1","label":"input 0"},{"from":"2","to":"1","label":"input 1"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"2","to":"6","label":"ExpressionReductionRule"}]}
48 | step OuterToInnerJoinRule {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"6","label":"[6] Left Outer Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]},{"id":"7","label":"[7] Inner Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"6","label":"input 0"},{"from":"6","to":"1","label":"input 0"},{"from":"6","to":"1","label":"input 1"},{"from":"7","to":"1","label":"input 0"},{"from":"7","to":"1","label":"input 1"},{"from":"6","to":"7","label":"OuterToInnerJoinRule"}]}
49 | final {"nodes":[{"id":"0","label":"[0] QueryRoot","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"4","label":"[4] Project [ref_0, ref_1, ref_2, ref_3, ref_4, ref_5, ref_6, ref_7, ref_8, ref_9]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"5","label":"[5] Filter [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"7","label":"[7] Inner Join [NULL]","annotations":["Num Columns: 10","Row Type: string, string, string, string, string, string, string, string, string, string","Keys: [key: [], lower_bound: 0, upper_bound: 0]"]},{"id":"1","label":"[1] TableScan id: 1","annotations":["Num Columns: 5","Row Type: string, string, string, string, string"]}],"edges":[{"from":"0","to":"4","label":"input 0"},{"from":"4","to":"5","label":"input 0"},{"from":"5","to":"7","label":"input 0"},{"from":"7","to":"1","label":"input 0"},{"from":"7","to":"1","label":"input 1"}]}
50 | ----
51 | ----
52 | 


--------------------------------------------------------------------------------