├── optd-persistent ├── src │ ├── cost_model │ │ ├── catalog │ │ │ ├── mod.rs │ │ │ └── mock_catalog.rs │ │ ├── mod.rs │ │ └── interface.rs │ ├── memo │ │ ├── mod.rs │ │ └── expression.rs │ ├── db │ │ └── init.db │ ├── bin │ │ └── migrate.rs │ ├── entities │ │ ├── database_metadata.rs │ │ ├── logical_property.rs │ │ ├── mod.rs │ │ ├── physical_property.rs │ │ ├── predicate_children.rs │ │ ├── trigger.rs │ │ ├── namespace_metadata.rs │ │ ├── event.rs │ │ ├── versioned_statistic.rs │ │ ├── statistic_to_attribute_junction.rs │ │ ├── logical_children.rs │ │ ├── index_metadata.rs │ │ ├── attribute_constraint_junction.rs │ │ ├── physical_children.rs │ │ ├── attribute_foreign_constraint_junction.rs │ │ ├── logical_expression.rs │ │ ├── predicate_logical_expression_junction.rs │ │ ├── predicate_physical_expression_junction.rs │ │ ├── physical_expression_to_statistic_junction.rs │ │ ├── prelude.rs │ │ ├── plan_cost.rs │ │ ├── table_metadata.rs │ │ ├── predicate.rs │ │ ├── group_winner.rs │ │ ├── constraint_metadata.rs │ │ ├── attribute.rs │ │ ├── cascades_group.rs │ │ ├── statistic.rs │ │ └── physical_expression.rs │ ├── migrator │ │ ├── cost_model │ │ │ ├── mod.rs │ │ │ ├── m20241029_000001_event.rs │ │ │ ├── m20241029_000001_versioned_statistic.rs │ │ │ ├── m20241029_000001_statistic.rs │ │ │ ├── m20241029_000001_statistic_to_attribute_junction.rs │ │ │ ├── m20241029_000001_physical_expression_to_statistic_junction.rs │ │ │ └── m20241029_000001_plan_cost.rs │ │ ├── catalog │ │ │ ├── m20241029_000001_database_metadata.rs │ │ │ ├── mod.rs │ │ │ ├── m20241029_000001_table_metadata.rs │ │ │ ├── m20241029_000001_namespace_metadata.rs │ │ │ ├── m20241029_000001_attribute.rs │ │ │ ├── m20241029_000001_trigger.rs │ │ │ ├── m20241029_000001_index_metadata.rs │ │ │ ├── m20241029_000001_constraint_metadata.rs │ │ │ ├── m20241029_000001_attribute_constraint_junction.rs │ │ │ └── m20241029_000001_attribute_foreign_constraint_junction.rs │ │ ├── memo │ │ │ ├── m20241029_000001_predicate.rs │ │ │ ├── mod.rs │ │ │ ├── m20241029_000001_logical_property.rs │ │ │ ├── m20241029_000001_physical_property.rs │ │ │ ├── m20241029_000001_predicate_children.rs │ │ │ ├── m20241029_000001_logical_children.rs │ │ │ ├── m20241029_000001_predicate_logical_expression_junction.rs │ │ │ ├── m20241029_000001_physical_children.rs │ │ │ ├── m20241029_000001_predicate_physical_expression_junction.rs │ │ │ ├── m20241029_000001_logical_expression.rs │ │ │ ├── m20241029_000001_physical_expression.rs │ │ │ ├── m20241029_000001_group_winner.rs │ │ │ └── m20241029_000001_cascades_group.rs │ │ └── mod.rs │ ├── lib.rs │ └── main.rs ├── Cargo.toml └── README.md ├── Cargo.toml ├── optd-cost-model ├── src │ ├── cost │ │ ├── mod.rs │ │ ├── filter │ │ │ ├── mod.rs │ │ │ ├── log_op.rs │ │ │ ├── constant.rs │ │ │ └── in_list.rs │ │ ├── limit.rs │ │ └── join │ │ │ ├── nested_loop_join.rs │ │ │ ├── hash_join.rs │ │ │ └── mod.rs │ ├── stats │ │ ├── utilities │ │ │ ├── mod.rs │ │ │ └── simple_map.rs │ │ └── arith_encoder.rs │ ├── common │ │ ├── mod.rs │ │ ├── predicates │ │ │ ├── mod.rs │ │ │ ├── sort_order_pred.rs │ │ │ ├── func_pred.rs │ │ │ ├── data_type_pred.rs │ │ │ ├── list_pred.rs │ │ │ ├── attr_index_pred.rs │ │ │ ├── in_list_pred.rs │ │ │ ├── cast_pred.rs │ │ │ ├── un_op_pred.rs │ │ │ ├── like_pred.rs │ │ │ ├── bin_op_pred.rs │ │ │ └── log_op_pred.rs │ │ ├── properties │ │ │ ├── schema.rs │ │ │ └── mod.rs │ │ ├── types.rs │ │ └── nodes.rs │ ├── storage │ │ ├── mod.rs │ │ └── mock.rs │ ├── memo_ext.rs │ └── utils.rs └── Cargo.toml ├── .gitignore ├── .github ├── dependabot.yml └── workflows │ ├── check.yml │ └── test.yml └── LICENSE /optd-persistent/src/cost_model/catalog/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod mock_catalog; 2 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["optd-cost-model", "optd-persistent"] 3 | resolver = "2" 4 | -------------------------------------------------------------------------------- /optd-persistent/src/memo/mod.rs: -------------------------------------------------------------------------------- 1 | mod expression; 2 | 3 | pub mod interface; 4 | pub mod orm; 5 | -------------------------------------------------------------------------------- /optd-persistent/src/cost_model/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod catalog; 2 | 3 | pub mod interface; 4 | pub mod orm; 5 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod agg; 2 | pub mod filter; 3 | pub mod join; 4 | pub mod limit; 5 | -------------------------------------------------------------------------------- /optd-cost-model/src/stats/utilities/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod counter; 2 | pub mod simple_map; 3 | pub mod tdigest; 4 | -------------------------------------------------------------------------------- /optd-persistent/src/db/init.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cmu-db/optd-experimental/HEAD/optd-persistent/src/db/init.db -------------------------------------------------------------------------------- /optd-cost-model/src/common/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod nodes; 2 | pub mod predicates; 3 | pub mod properties; 4 | pub mod types; 5 | pub mod values; 6 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/filter/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod attribute; 2 | pub mod comp_op; 3 | pub mod constant; 4 | pub mod core; 5 | pub mod in_list; 6 | pub mod like; 7 | pub mod log_op; 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # These are backup files generated by rustfmt 7 | **/*.rs.bk 8 | 9 | **/*.db 10 | !init.db 11 | 12 | .DS_Store 13 | 14 | optd-persistent/sql/ 15 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod attr_index_pred; 2 | pub mod bin_op_pred; 3 | pub mod cast_pred; 4 | pub mod constant_pred; 5 | pub mod data_type_pred; 6 | pub mod func_pred; 7 | pub mod in_list_pred; 8 | pub mod like_pred; 9 | pub mod list_pred; 10 | pub mod log_op_pred; 11 | pub mod sort_order_pred; 12 | pub mod un_op_pred; 13 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/sort_order_pred.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | /// TODO: documentation 4 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 5 | pub enum SortOrderType { 6 | Asc, 7 | Desc, 8 | } 9 | 10 | impl Display for SortOrderType { 11 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 12 | write!(f, "{:?}", self) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /optd-persistent/src/bin/migrate.rs: -------------------------------------------------------------------------------- 1 | use optd_persistent::{migrate, DATABASE_FILENAME, DATABASE_URL}; 2 | use sea_orm::*; 3 | use sea_orm_migration::prelude::*; 4 | 5 | #[tokio::main] 6 | async fn main() { 7 | let _ = std::fs::remove_file(DATABASE_FILENAME); 8 | 9 | let db = Database::connect(DATABASE_URL) 10 | .await 11 | .expect("Unable to connect to the database"); 12 | 13 | migrate(&db) 14 | .await 15 | .expect("Something went wrong during migration"); 16 | } 17 | -------------------------------------------------------------------------------- /optd-cost-model/src/stats/utilities/simple_map.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::hash::Hash; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use crate::common::values::Value; 7 | 8 | /// TODO: documentation 9 | /// Now it is mainly for testing purposes. 10 | #[derive(Clone, Serialize, Deserialize, Debug, Default)] 11 | pub struct SimpleMap { 12 | pub(crate) m: HashMap, 13 | } 14 | 15 | impl SimpleMap { 16 | pub fn new(v: Vec<(K, f64)>) -> Self { 17 | Self { 18 | m: v.into_iter().collect(), 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /optd-persistent/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-persistent" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Sarvesh Tandon", "Connor Tsui"] 6 | 7 | [dependencies] 8 | sea-orm = { version = "1.1.1", features = [ 9 | "sqlx-sqlite", 10 | "runtime-tokio-rustls", 11 | "macros", 12 | "debug-print", 13 | "with-json", 14 | ] } 15 | sea-orm-migration = "1.0.0" 16 | serde_json = "1.0.118" # Hash implementation on serde_json::Value 17 | tokio = { version = "1.0.1", features = ["macros", "rt-multi-thread"] } 18 | trait-variant = "0.1.2" 19 | 20 | # Pin more recent versions for -Zminimal-versions 21 | async-trait = "0.1.43" 22 | async-stream = "0.3.1" 23 | strum = "0.26.1" 24 | num_enum = "0.7.3" 25 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/func_pred.rs: -------------------------------------------------------------------------------- 1 | /// TODO: documentation 2 | #[derive(Clone, PartialEq, Eq, Hash, Debug)] 3 | pub enum FuncType { 4 | Scalar(datafusion_expr::BuiltinScalarFunction), 5 | Agg(datafusion_expr::AggregateFunction), 6 | Case, 7 | } 8 | 9 | impl std::fmt::Display for FuncType { 10 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 11 | write!(f, "{:?}", self) 12 | } 13 | } 14 | 15 | impl FuncType { 16 | pub fn new_scalar(func_id: datafusion_expr::BuiltinScalarFunction) -> Self { 17 | FuncType::Scalar(func_id) 18 | } 19 | 20 | pub fn new_agg(func_id: datafusion_expr::AggregateFunction) -> Self { 21 | FuncType::Agg(func_id) 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /optd-cost-model/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "optd-cost-model" 3 | version = "0.1.0" 4 | edition = "2021" 5 | authors = ["Yuanxin Cao", "Lan Lou", "Kunle Li"] 6 | 7 | [dependencies] 8 | optd-persistent = { path = "../optd-persistent", version = "0.1" } 9 | serde = { version = "1.0", features = ["derive"] } 10 | serde_json = "1.0" 11 | serde_with = { version = "3.7.0", features = ["json"] } 12 | arrow-schema = "53.2.0" 13 | datafusion-expr = "32.0.0" 14 | datafusion = "32.0.0" 15 | ordered-float = "4.0" 16 | chrono = "0.4" 17 | itertools = "0.13" 18 | assert_approx_eq = "1.1.0" 19 | trait-variant = "0.1.2" 20 | tokio = { version = "1.0.1", features = ["macros", "rt-multi-thread"] } 21 | async-trait = "0.1" 22 | 23 | [dev-dependencies] 24 | crossbeam = "0.8" 25 | rand = "0.8" 26 | test-case = "3.3" 27 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/database_metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "database_metadata")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub creation_time: DateTimeUtc, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm(has_many = "super::namespace_metadata::Entity")] 17 | NamespaceMetadata, 18 | } 19 | 20 | impl Related for Entity { 21 | fn to() -> RelationDef { 22 | Relation::NamespaceMetadata.def() 23 | } 24 | } 25 | 26 | impl ActiveModelBehavior for ActiveModel {} 27 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod m20241029_000001_event; 2 | pub(crate) mod m20241029_000001_physical_expression_to_statistic_junction; 3 | pub(crate) mod m20241029_000001_plan_cost; 4 | pub(crate) mod m20241029_000001_statistic; 5 | pub(crate) mod m20241029_000001_statistic_to_attribute_junction; 6 | pub(crate) mod m20241029_000001_versioned_statistic; 7 | 8 | pub(crate) use m20241029_000001_event as event; 9 | pub(crate) use m20241029_000001_physical_expression_to_statistic_junction as physical_expression_to_statistic_junction; 10 | pub(crate) use m20241029_000001_plan_cost as plan_cost; 11 | pub(crate) use m20241029_000001_statistic as statistic; 12 | pub(crate) use m20241029_000001_statistic_to_attribute_junction as statistic_to_attribute_junction; 13 | pub(crate) use m20241029_000001_versioned_statistic as versioned_statistic; 14 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://github.com/jonhoo/rust-ci-conf/blob/main/.github/dependabot.yml 2 | 3 | version: 2 4 | updates: 5 | - package-ecosystem: github-actions 6 | directory: /optd-persistent 7 | schedule: 8 | interval: daily 9 | - package-ecosystem: cargo 10 | directory: /optd-persistent 11 | schedule: 12 | interval: daily 13 | ignore: 14 | - dependency-name: "*" 15 | # patch and minor updates don't matter for libraries as consumers of this library build 16 | # with their own lockfile, rather than the version specified in this library's lockfile 17 | # remove this ignore rule if your package has binaries to ensure that the binaries are 18 | # built with the exact set of dependencies and those are up to date. 19 | update-types: 20 | - "version-update:semver-patch" 21 | - "version-update:semver-minor" -------------------------------------------------------------------------------- /optd-cost-model/src/cost/limit.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{ 3 | nodes::{ArcPredicateNode, ReprPredicateNode}, 4 | predicates::constant_pred::ConstantPred, 5 | }, 6 | cost_model::CostModelImpl, 7 | storage::CostModelStorageManager, 8 | CostModelResult, EstimatedStatistic, 9 | }; 10 | 11 | impl CostModelImpl { 12 | pub(crate) fn get_limit_row_cnt( 13 | &self, 14 | child_row_cnt: EstimatedStatistic, 15 | fetch_expr: ArcPredicateNode, 16 | ) -> CostModelResult { 17 | let fetch = ConstantPred::from_pred_node(fetch_expr) 18 | .unwrap() 19 | .value() 20 | .as_u64(); 21 | // u64::MAX represents None 22 | if fetch == u64::MAX { 23 | Ok(child_row_cnt) 24 | } else { 25 | Ok(EstimatedStatistic(child_row_cnt.0.min(fetch as f64))) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/logical_property.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "logical_property")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub group_id: i32, 11 | pub variant_tag: i16, 12 | pub data: Json, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm( 18 | belongs_to = "super::cascades_group::Entity", 19 | from = "Column::GroupId", 20 | to = "super::cascades_group::Column::Id", 21 | on_update = "Cascade", 22 | on_delete = "Cascade" 23 | )] 24 | CascadesGroup, 25 | } 26 | 27 | impl Related for Entity { 28 | fn to() -> RelationDef { 29 | Relation::CascadesGroup.def() 30 | } 31 | } 32 | 33 | impl ActiveModelBehavior for ActiveModel {} 34 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/mod.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | pub mod prelude; 4 | 5 | pub mod attribute; 6 | pub mod attribute_constraint_junction; 7 | pub mod attribute_foreign_constraint_junction; 8 | pub mod cascades_group; 9 | pub mod constraint_metadata; 10 | pub mod database_metadata; 11 | pub mod event; 12 | pub mod group_winner; 13 | pub mod index_metadata; 14 | pub mod logical_children; 15 | pub mod logical_expression; 16 | pub mod logical_property; 17 | pub mod namespace_metadata; 18 | pub mod physical_children; 19 | pub mod physical_expression; 20 | pub mod physical_expression_to_statistic_junction; 21 | pub mod physical_property; 22 | pub mod plan_cost; 23 | pub mod predicate; 24 | pub mod predicate_children; 25 | pub mod predicate_logical_expression_junction; 26 | pub mod predicate_physical_expression_junction; 27 | pub mod statistic; 28 | pub mod statistic_to_attribute_junction; 29 | pub mod table_metadata; 30 | pub mod trigger; 31 | pub mod versioned_statistic; 32 | -------------------------------------------------------------------------------- /optd-cost-model/src/storage/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::types::{EpochId, ExprId, TableId}, 3 | stats::AttributeCombValueStats, 4 | Cost, CostModelResult, EstimatedStatistic, 5 | }; 6 | 7 | pub mod mock; 8 | pub mod persistent; 9 | 10 | #[trait_variant::make(Send)] 11 | pub trait CostModelStorageManager { 12 | async fn get_attributes_comb_statistics( 13 | &self, 14 | table_id: TableId, 15 | attr_base_indices: &[u64], 16 | ) -> CostModelResult>; 17 | 18 | async fn get_table_row_count(&self, table_id: TableId) -> CostModelResult>; 19 | 20 | async fn get_cost( 21 | &self, 22 | expr_id: ExprId, 23 | ) -> CostModelResult<(Option, Option)>; 24 | 25 | async fn store_cost( 26 | &self, 27 | expr_id: ExprId, 28 | cost: Option, 29 | estimated_statistic: Option, 30 | epoch_id: Option, 31 | ) -> CostModelResult<()>; 32 | } 33 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/physical_property.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "physical_property")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub physical_expression_id: i32, 11 | pub variant_tag: i16, 12 | pub data: Json, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm( 18 | belongs_to = "super::physical_expression::Entity", 19 | from = "Column::PhysicalExpressionId", 20 | to = "super::physical_expression::Column::Id", 21 | on_update = "Cascade", 22 | on_delete = "Cascade" 23 | )] 24 | PhysicalExpression, 25 | } 26 | 27 | impl Related for Entity { 28 | fn to() -> RelationDef { 29 | Relation::PhysicalExpression.def() 30 | } 31 | } 32 | 33 | impl ActiveModelBehavior for ActiveModel {} 34 | -------------------------------------------------------------------------------- /optd-persistent/README.md: -------------------------------------------------------------------------------- 1 | # Generate the `entities` module 2 | 3 | To make changes to the database tables and schema, you will have to modify files in the `migrator` module and then update the `entities` module using `sea-orm-cli`. 4 | 5 | This assumes that you already have the `sqlite3` binary installed. First, make sure you have installed `sea-orm-cli`: 6 | 7 | ```sh 8 | $ cargo install sea-orm-cli 9 | ``` 10 | 11 | Make sure your working directory is in the crate root: 12 | 13 | ```sh 14 | $ cd optd-persistent 15 | ``` 16 | 17 | If you have not generate the `sqlite.db` file yet, you will need to run this command which will generate the `sqlite.db` file and run all of the migrations: 18 | 19 | ```sh 20 | $ cargo run --bin migrate 21 | ``` 22 | 23 | Finally, run this command to generate / overwrite the `entities` module in the `src` directory. 24 | 25 | ``` 26 | $ sea-orm-cli generate entity -u sqlite:./sqlite.db -o src/entities 27 | ``` 28 | 29 | # Demo 30 | 31 | To run the demo, run the root binary crate: 32 | 33 | ```sh 34 | $ cargo run --bin optd-persistent 35 | ``` 36 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/properties/schema.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | use super::Attribute; 6 | 7 | /// [`Schema`] represents the schema of a group in the memo. It contains a list of attributes. 8 | #[derive(Clone, Debug, Serialize, Deserialize)] 9 | pub struct Schema { 10 | pub attributes: Vec, 11 | } 12 | 13 | impl std::fmt::Display for Schema { 14 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 15 | write!( 16 | f, 17 | "[{}]", 18 | self.attributes.iter().map(|x| x.to_string()).join(", ") 19 | ) 20 | } 21 | } 22 | 23 | impl Schema { 24 | pub fn new(attributes: Vec) -> Self { 25 | Self { attributes } 26 | } 27 | 28 | pub fn len(&self) -> usize { 29 | self.attributes.len() 30 | } 31 | 32 | pub fn is_empty(&self) -> bool { 33 | self.len() == 0 34 | } 35 | } 36 | 37 | impl From> for Schema { 38 | fn from(attributes: Vec) -> Self { 39 | Self::new(attributes) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/properties/mod.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use super::predicates::constant_pred::ConstantType; 4 | 5 | pub mod attr_ref; 6 | pub mod schema; 7 | 8 | #[derive(Clone, Debug, Serialize, Deserialize)] 9 | pub struct Attribute { 10 | pub name: String, 11 | pub typ: ConstantType, 12 | pub nullable: bool, 13 | } 14 | 15 | impl std::fmt::Display for Attribute { 16 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 17 | if self.nullable { 18 | write!(f, "{}:{:?}", self.name, self.typ) 19 | } else { 20 | write!(f, "{}:{:?}(non-null)", self.name, self.typ) 21 | } 22 | } 23 | } 24 | 25 | impl Attribute { 26 | pub fn new(name: String, typ: ConstantType, nullable: bool) -> Self { 27 | Self { 28 | name, 29 | typ, 30 | nullable, 31 | } 32 | } 33 | 34 | pub fn new_non_null_int64(name: String) -> Self { 35 | Self { 36 | name, 37 | typ: ConstantType::Int64, 38 | nullable: false, 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/predicate_children.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "predicate_children")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub parent_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub child_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::predicate::Entity", 18 | from = "Column::ChildId", 19 | to = "super::predicate::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | Predicate2, 24 | #[sea_orm( 25 | belongs_to = "super::predicate::Entity", 26 | from = "Column::ParentId", 27 | to = "super::predicate::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | Predicate1, 32 | } 33 | 34 | impl ActiveModelBehavior for ActiveModel {} 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 CMU Database Group 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_database_metadata.rs: -------------------------------------------------------------------------------- 1 | use sea_orm_migration::{prelude::*, schema::*}; 2 | 3 | #[derive(Iden)] 4 | pub enum DatabaseMetadata { 5 | Table, 6 | Id, 7 | Name, 8 | CreationTime, 9 | } 10 | 11 | #[derive(DeriveMigrationName)] 12 | pub struct Migration; 13 | 14 | #[async_trait::async_trait] 15 | impl MigrationTrait for Migration { 16 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 17 | manager 18 | .create_table( 19 | Table::create() 20 | .table(DatabaseMetadata::Table) 21 | .if_not_exists() 22 | .col(pk_auto(DatabaseMetadata::Id)) 23 | .col(string(DatabaseMetadata::Name)) 24 | .col(timestamp(DatabaseMetadata::CreationTime)) 25 | .to_owned(), 26 | ) 27 | .await 28 | } 29 | 30 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 31 | manager 32 | .drop_table(Table::drop().table(DatabaseMetadata::Table).to_owned()) 33 | .await 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/data_type_pred.rs: -------------------------------------------------------------------------------- 1 | use arrow_schema::DataType; 2 | 3 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 4 | 5 | #[derive(Clone, Debug)] 6 | pub struct DataTypePred(pub ArcPredicateNode); 7 | 8 | impl DataTypePred { 9 | pub fn new(typ: DataType) -> Self { 10 | DataTypePred( 11 | PredicateNode { 12 | typ: PredicateType::DataType(typ), 13 | children: vec![], 14 | data: None, 15 | } 16 | .into(), 17 | ) 18 | } 19 | 20 | pub fn data_type(&self) -> DataType { 21 | if let PredicateType::DataType(ref data_type) = self.0.typ { 22 | data_type.clone() 23 | } else { 24 | panic!("not a data type") 25 | } 26 | } 27 | } 28 | 29 | impl ReprPredicateNode for DataTypePred { 30 | fn into_pred_node(self) -> ArcPredicateNode { 31 | self.0 32 | } 33 | 34 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 35 | if !matches!(pred_node.typ, PredicateType::DataType(_)) { 36 | return None; 37 | } 38 | Some(Self(pred_node)) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod m20241029_000001_attribute; 2 | pub(crate) mod m20241029_000001_attribute_constraint_junction; 3 | pub(crate) mod m20241029_000001_attribute_foreign_constraint_junction; 4 | pub(crate) mod m20241029_000001_constraint_metadata; 5 | pub(crate) mod m20241029_000001_database_metadata; 6 | pub(crate) mod m20241029_000001_index_metadata; 7 | pub(crate) mod m20241029_000001_namespace_metadata; 8 | pub(crate) mod m20241029_000001_table_metadata; 9 | pub(crate) mod m20241029_000001_trigger; 10 | 11 | pub(crate) use m20241029_000001_attribute as attribute; 12 | pub(crate) use m20241029_000001_attribute_constraint_junction as attribute_constraint_junction; 13 | pub(crate) use m20241029_000001_attribute_foreign_constraint_junction as attribute_foreign_constraint_junction; 14 | pub(crate) use m20241029_000001_constraint_metadata as constraint_metadata; 15 | pub(crate) use m20241029_000001_database_metadata as database_metadata; 16 | pub(crate) use m20241029_000001_index_metadata as index_metadata; 17 | pub(crate) use m20241029_000001_namespace_metadata as namespace_metadata; 18 | pub(crate) use m20241029_000001_table_metadata as table_metadata; 19 | pub(crate) use m20241029_000001_trigger as trigger; 20 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/trigger.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "trigger")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub table_id: i32, 12 | pub parent_trigger_id: i32, 13 | pub function: Json, 14 | } 15 | 16 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 17 | pub enum Relation { 18 | #[sea_orm( 19 | belongs_to = "super::table_metadata::Entity", 20 | from = "Column::TableId", 21 | to = "super::table_metadata::Column::Id", 22 | on_update = "Cascade", 23 | on_delete = "Cascade" 24 | )] 25 | TableMetadata, 26 | #[sea_orm( 27 | belongs_to = "Entity", 28 | from = "Column::ParentTriggerId", 29 | to = "Column::Id", 30 | on_update = "Cascade", 31 | on_delete = "Cascade" 32 | )] 33 | SelfRef, 34 | } 35 | 36 | impl Related for Entity { 37 | fn to() -> RelationDef { 38 | Relation::TableMetadata.def() 39 | } 40 | } 41 | 42 | impl ActiveModelBehavior for ActiveModel {} 43 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_predicate.rs: -------------------------------------------------------------------------------- 1 | /* 2 | Table predicate { 3 | id integer [pk] 4 | data json 5 | variant integer 6 | } 7 | */ 8 | 9 | use sea_orm_migration::{ 10 | prelude::*, 11 | schema::{integer, json, pk_auto}, 12 | }; 13 | 14 | #[derive(Iden)] 15 | pub enum Predicate { 16 | Table, 17 | Id, 18 | Data, 19 | Variant, 20 | } 21 | 22 | #[derive(DeriveMigrationName)] 23 | pub struct Migration; 24 | 25 | #[async_trait::async_trait] 26 | impl MigrationTrait for Migration { 27 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 28 | manager 29 | .create_table( 30 | Table::create() 31 | .table(Predicate::Table) 32 | .if_not_exists() 33 | .col(pk_auto(Predicate::Id)) 34 | .col(json(Predicate::Data)) 35 | .col(integer(Predicate::Variant)) 36 | .to_owned(), 37 | ) 38 | .await 39 | } 40 | 41 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 42 | manager 43 | .drop_table(Table::drop().table(Predicate::Table).to_owned()) 44 | .await 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/namespace_metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "namespace_metadata")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub database_id: i32, 12 | pub creation_time: DateTimeUtc, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm( 18 | belongs_to = "super::database_metadata::Entity", 19 | from = "Column::DatabaseId", 20 | to = "super::database_metadata::Column::Id", 21 | on_update = "Cascade", 22 | on_delete = "Cascade" 23 | )] 24 | DatabaseMetadata, 25 | #[sea_orm(has_many = "super::table_metadata::Entity")] 26 | TableMetadata, 27 | } 28 | 29 | impl Related for Entity { 30 | fn to() -> RelationDef { 31 | Relation::DatabaseMetadata.def() 32 | } 33 | } 34 | 35 | impl Related for Entity { 36 | fn to() -> RelationDef { 37 | Relation::TableMetadata.def() 38 | } 39 | } 40 | 41 | impl ActiveModelBehavior for ActiveModel {} 42 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_event.rs: -------------------------------------------------------------------------------- 1 | //! Every time we insert/update statistics, we need to insert a new 2 | //! row into this table to record the event. 3 | 4 | use sea_orm_migration::{prelude::*, schema::*}; 5 | 6 | #[derive(Iden)] 7 | pub enum Event { 8 | Table, 9 | EpochId, 10 | Timestamp, 11 | SourceVariant, 12 | Data, 13 | } 14 | 15 | #[derive(DeriveMigrationName)] 16 | pub struct Migration; 17 | 18 | #[async_trait::async_trait] 19 | impl MigrationTrait for Migration { 20 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 21 | manager 22 | .create_table( 23 | Table::create() 24 | .table(Event::Table) 25 | .if_not_exists() 26 | .col(pk_auto(Event::EpochId)) 27 | .col(timestamp(Event::Timestamp)) 28 | .col(string(Event::SourceVariant)) 29 | .col(json(Event::Data)) 30 | .to_owned(), 31 | ) 32 | .await 33 | } 34 | 35 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 36 | manager 37 | .drop_table(Table::drop().table(Event::Table).to_owned()) 38 | .await 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/event.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "event")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub epoch_id: i32, 10 | pub timestamp: DateTimeUtc, 11 | pub source_variant: String, 12 | pub data: Json, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm(has_many = "super::group_winner::Entity")] 18 | GroupWinner, 19 | #[sea_orm(has_many = "super::plan_cost::Entity")] 20 | PlanCost, 21 | #[sea_orm(has_many = "super::versioned_statistic::Entity")] 22 | VersionedStatistic, 23 | } 24 | 25 | impl Related for Entity { 26 | fn to() -> RelationDef { 27 | Relation::GroupWinner.def() 28 | } 29 | } 30 | 31 | impl Related for Entity { 32 | fn to() -> RelationDef { 33 | Relation::PlanCost.def() 34 | } 35 | } 36 | 37 | impl Related for Entity { 38 | fn to() -> RelationDef { 39 | Relation::VersionedStatistic.def() 40 | } 41 | } 42 | 43 | impl ActiveModelBehavior for ActiveModel {} 44 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/filter/log_op.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{nodes::ArcPredicateNode, predicates::log_op_pred::LogOpType, types::GroupId}, 3 | cost_model::CostModelImpl, 4 | storage::CostModelStorageManager, 5 | CostModelResult, 6 | }; 7 | 8 | impl CostModelImpl { 9 | pub(crate) async fn get_log_op_selectivity( 10 | &self, 11 | group_id: GroupId, 12 | log_op_typ: LogOpType, 13 | children: &[ArcPredicateNode], 14 | ) -> CostModelResult { 15 | match log_op_typ { 16 | LogOpType::And => { 17 | let mut and_sel = 1.0; 18 | for child in children { 19 | let selectivity = self.get_filter_selectivity(group_id, child.clone()).await?; 20 | and_sel *= selectivity; 21 | } 22 | Ok(and_sel) 23 | } 24 | LogOpType::Or => { 25 | let mut or_sel_neg = 1.0; 26 | for child in children { 27 | let selectivity = self.get_filter_selectivity(group_id, child.clone()).await?; 28 | or_sel_neg *= 1.0 - selectivity; 29 | } 30 | Ok(1.0 - or_sel_neg) 31 | } 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/list_pred.rs: -------------------------------------------------------------------------------- 1 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 2 | 3 | #[derive(Clone, Debug)] 4 | pub struct ListPred(pub ArcPredicateNode); 5 | 6 | impl ListPred { 7 | pub fn new(preds: Vec) -> Self { 8 | ListPred( 9 | PredicateNode { 10 | typ: PredicateType::List, 11 | children: preds, 12 | data: None, 13 | } 14 | .into(), 15 | ) 16 | } 17 | 18 | /// Gets number of expressions in the list 19 | pub fn len(&self) -> usize { 20 | self.0.children.len() 21 | } 22 | 23 | pub fn is_empty(&self) -> bool { 24 | self.0.children.is_empty() 25 | } 26 | 27 | pub fn child(&self, idx: usize) -> ArcPredicateNode { 28 | self.0.child(idx) 29 | } 30 | 31 | pub fn to_vec(&self) -> Vec { 32 | self.0.children.clone() 33 | } 34 | } 35 | 36 | impl ReprPredicateNode for ListPred { 37 | fn into_pred_node(self) -> ArcPredicateNode { 38 | self.0 39 | } 40 | 41 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 42 | if pred_node.typ != PredicateType::List { 43 | return None; 44 | } 45 | Some(Self(pred_node)) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/attr_index_pred.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}, 3 | values::Value, 4 | }; 5 | 6 | /// [`AttributeIndexPred`] represents the position of an attribute in a schema or 7 | /// [`GroupAttrRefs`]. 8 | /// 9 | /// The `data` field holds the index of the attribute in the schema or [`GroupAttrRefs`]. 10 | #[derive(Clone, Debug)] 11 | pub struct AttrIndexPred(pub ArcPredicateNode); 12 | 13 | impl AttrIndexPred { 14 | pub fn new(attr_idx: u64) -> AttrIndexPred { 15 | AttrIndexPred( 16 | PredicateNode { 17 | typ: PredicateType::AttrIndex, 18 | children: vec![], 19 | data: Some(Value::UInt64(attr_idx)), 20 | } 21 | .into(), 22 | ) 23 | } 24 | 25 | /// Gets the attribute index. 26 | pub fn attr_index(&self) -> u64 { 27 | self.0.data.as_ref().unwrap().as_u64() 28 | } 29 | } 30 | 31 | impl ReprPredicateNode for AttrIndexPred { 32 | fn into_pred_node(self) -> ArcPredicateNode { 33 | self.0 34 | } 35 | 36 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 37 | if pred_node.typ != PredicateType::AttrIndex { 38 | return None; 39 | } 40 | Some(Self(pred_node)) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/versioned_statistic.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "versioned_statistic")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub epoch_id: i32, 11 | pub statistic_id: i32, 12 | pub statistic_value: Json, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm( 18 | belongs_to = "super::event::Entity", 19 | from = "Column::EpochId", 20 | to = "super::event::Column::EpochId", 21 | on_update = "Cascade", 22 | on_delete = "Cascade" 23 | )] 24 | Event, 25 | #[sea_orm( 26 | belongs_to = "super::statistic::Entity", 27 | from = "Column::StatisticId", 28 | to = "super::statistic::Column::Id", 29 | on_update = "Cascade", 30 | on_delete = "Cascade" 31 | )] 32 | Statistic, 33 | } 34 | 35 | impl Related for Entity { 36 | fn to() -> RelationDef { 37 | Relation::Event.def() 38 | } 39 | } 40 | 41 | impl Related for Entity { 42 | fn to() -> RelationDef { 43 | Relation::Statistic.def() 44 | } 45 | } 46 | 47 | impl ActiveModelBehavior for ActiveModel {} 48 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/in_list_pred.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}, 3 | values::Value, 4 | }; 5 | 6 | use super::list_pred::ListPred; 7 | 8 | #[derive(Clone, Debug)] 9 | pub struct InListPred(pub ArcPredicateNode); 10 | 11 | impl InListPred { 12 | pub fn new(child: ArcPredicateNode, list: ListPred, negated: bool) -> Self { 13 | InListPred( 14 | PredicateNode { 15 | typ: PredicateType::InList, 16 | children: vec![child, list.into_pred_node()], 17 | data: Some(Value::Bool(negated)), 18 | } 19 | .into(), 20 | ) 21 | } 22 | 23 | pub fn child(&self) -> ArcPredicateNode { 24 | self.0.child(0) 25 | } 26 | 27 | pub fn list(&self) -> ListPred { 28 | ListPred::from_pred_node(self.0.child(1)).unwrap() 29 | } 30 | 31 | /// `true` for `NOT IN`. 32 | pub fn negated(&self) -> bool { 33 | self.0.data.as_ref().unwrap().as_bool() 34 | } 35 | } 36 | 37 | impl ReprPredicateNode for InListPred { 38 | fn into_pred_node(self) -> ArcPredicateNode { 39 | self.0 40 | } 41 | 42 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 43 | if !matches!(pred_node.typ, PredicateType::InList) { 44 | return None; 45 | } 46 | Some(Self(pred_node)) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /optd-cost-model/src/memo_ext.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | properties::{ 3 | attr_ref::{AttrRef, GroupAttrRefs}, 4 | schema::Schema, 5 | Attribute, 6 | }, 7 | types::GroupId, 8 | }; 9 | 10 | /// [`MemoExt`] is a trait that provides methods to access the schema, column reference, and attribute 11 | /// information of a group in the memo. The information are used by the cost model to compute the cost of 12 | /// an expression. 13 | /// 14 | /// [`MemoExt`] should be implemented by the optimizer core to provide the necessary information to the cost 15 | /// model. All information required here is already present in the memo, so the optimizer core should be able 16 | /// to implement this trait without additional work. 17 | pub trait MemoExt: Send + Sync + 'static { 18 | /// Get the schema of a group in the memo. 19 | fn get_schema(&self, group_id: GroupId) -> Schema; 20 | /// Get the attribute info of a given attribute in a group in the memo. 21 | fn get_attribute_info(&self, group_id: GroupId, attr_ref_idx: u64) -> Attribute; 22 | /// Get the attribute reference of a group in the memo. 23 | fn get_attribute_refs(&self, group_id: GroupId) -> GroupAttrRefs; 24 | /// Get the attribute reference of a given attribute in a group in the memo. 25 | fn get_attribute_ref(&self, group_id: GroupId, attr_ref_idx: u64) -> AttrRef; 26 | 27 | // TODO: Figure out what other information is needed to compute the cost... 28 | } 29 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/statistic_to_attribute_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "statistic_to_attribute_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub statistic_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub attribute_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::attribute::Entity", 18 | from = "Column::AttributeId", 19 | to = "super::attribute::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | Attribute, 24 | #[sea_orm( 25 | belongs_to = "super::statistic::Entity", 26 | from = "Column::StatisticId", 27 | to = "super::statistic::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | Statistic, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::Attribute.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::Statistic.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/filter/constant.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{ 3 | nodes::{ArcPredicateNode, PredicateType}, 4 | predicates::constant_pred::ConstantType, 5 | values::Value, 6 | }, 7 | cost_model::CostModelImpl, 8 | storage::CostModelStorageManager, 9 | }; 10 | 11 | impl CostModelImpl { 12 | pub(crate) fn get_constant_selectivity(const_node: ArcPredicateNode) -> f64 { 13 | if let PredicateType::Constant(const_typ) = const_node.typ { 14 | if matches!(const_typ, ConstantType::Bool) { 15 | let value = const_node 16 | .as_ref() 17 | .data 18 | .as_ref() 19 | .expect("constants should have data"); 20 | if let Value::Bool(bool_value) = value { 21 | if *bool_value { 22 | 1.0 23 | } else { 24 | 0.0 25 | } 26 | } else { 27 | unreachable!( 28 | "if the typ is ConstantType::Bool, the value should be a Value::Bool" 29 | ) 30 | } 31 | } else { 32 | panic!("selectivity is not defined on constants which are not bools") 33 | } 34 | } else { 35 | panic!("get_constant_selectivity must be called on a constant") 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/logical_children.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "logical_children")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub logical_expression_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub group_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::cascades_group::Entity", 18 | from = "Column::GroupId", 19 | to = "super::cascades_group::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | CascadesGroup, 24 | #[sea_orm( 25 | belongs_to = "super::logical_expression::Entity", 26 | from = "Column::GroupId", 27 | to = "super::logical_expression::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | LogicalExpression, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::CascadesGroup.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::LogicalExpression.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/index_metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "index_metadata")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub table_id: i32, 11 | pub name: String, 12 | pub number_of_attributes: i32, 13 | pub variant_tag: i32, 14 | pub is_unique: bool, 15 | pub nulls_not_distinct: bool, 16 | pub is_primary: bool, 17 | pub is_clustered: bool, 18 | pub is_exclusion: bool, 19 | pub description: String, 20 | } 21 | 22 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 23 | pub enum Relation { 24 | #[sea_orm(has_many = "super::constraint_metadata::Entity")] 25 | ConstraintMetadata, 26 | #[sea_orm( 27 | belongs_to = "super::table_metadata::Entity", 28 | from = "Column::TableId", 29 | to = "super::table_metadata::Column::Id", 30 | on_update = "Cascade", 31 | on_delete = "Cascade" 32 | )] 33 | TableMetadata, 34 | } 35 | 36 | impl Related for Entity { 37 | fn to() -> RelationDef { 38 | Relation::ConstraintMetadata.def() 39 | } 40 | } 41 | 42 | impl Related for Entity { 43 | fn to() -> RelationDef { 44 | Relation::TableMetadata.def() 45 | } 46 | } 47 | 48 | impl ActiveModelBehavior for ActiveModel {} 49 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/attribute_constraint_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "attribute_constraint_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub attribute_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub constraint_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::attribute::Entity", 18 | from = "Column::AttributeId", 19 | to = "super::attribute::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | Attribute, 24 | #[sea_orm( 25 | belongs_to = "super::constraint_metadata::Entity", 26 | from = "Column::ConstraintId", 27 | to = "super::constraint_metadata::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | ConstraintMetadata, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::Attribute.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::ConstraintMetadata.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/physical_children.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "physical_children")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub physical_expression_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub group_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::cascades_group::Entity", 18 | from = "Column::GroupId", 19 | to = "super::cascades_group::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | CascadesGroup, 24 | #[sea_orm( 25 | belongs_to = "super::physical_expression::Entity", 26 | from = "Column::PhysicalExpressionId", 27 | to = "super::physical_expression::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | PhysicalExpression, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::CascadesGroup.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::PhysicalExpression.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/attribute_foreign_constraint_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "attribute_foreign_constraint_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub attribute_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub constraint_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::attribute::Entity", 18 | from = "Column::AttributeId", 19 | to = "super::attribute::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | Attribute, 24 | #[sea_orm( 25 | belongs_to = "super::constraint_metadata::Entity", 26 | from = "Column::ConstraintId", 27 | to = "super::constraint_metadata::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | ConstraintMetadata, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::Attribute.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::ConstraintMetadata.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/logical_expression.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "logical_expression")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub group_id: i32, 11 | pub fingerprint: i64, 12 | pub variant_tag: i16, 13 | pub data: Json, 14 | } 15 | 16 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 17 | pub enum Relation { 18 | #[sea_orm( 19 | belongs_to = "super::cascades_group::Entity", 20 | from = "Column::GroupId", 21 | to = "super::cascades_group::Column::Id", 22 | on_update = "Cascade", 23 | on_delete = "Cascade" 24 | )] 25 | CascadesGroup, 26 | #[sea_orm(has_many = "super::logical_children::Entity")] 27 | LogicalChildren, 28 | } 29 | 30 | impl Related for Entity { 31 | fn to() -> RelationDef { 32 | Relation::LogicalChildren.def() 33 | } 34 | } 35 | 36 | impl Related for Entity { 37 | fn to() -> RelationDef { 38 | super::logical_children::Relation::CascadesGroup.def() 39 | } 40 | fn via() -> Option { 41 | Some( 42 | super::logical_children::Relation::LogicalExpression 43 | .def() 44 | .rev(), 45 | ) 46 | } 47 | } 48 | 49 | impl ActiveModelBehavior for ActiveModel {} 50 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/predicate_logical_expression_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "predicate_logical_expression_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub logical_expr_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub predicate_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::logical_expression::Entity", 18 | from = "Column::LogicalExprId", 19 | to = "super::logical_expression::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | LogicalExpression, 24 | #[sea_orm( 25 | belongs_to = "super::predicate::Entity", 26 | from = "Column::PredicateId", 27 | to = "super::predicate::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | Predicate, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::LogicalExpression.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::Predicate.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/cast_pred.rs: -------------------------------------------------------------------------------- 1 | use arrow_schema::DataType; 2 | 3 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 4 | 5 | use super::data_type_pred::DataTypePred; 6 | 7 | /// [`CastPred`] casts a column from one data type to another. 8 | /// 9 | /// A [`CastPred`] has two children: 10 | /// 1. The original data to cast 11 | /// 2. The target data type to cast to 12 | #[derive(Clone, Debug)] 13 | pub struct CastPred(pub ArcPredicateNode); 14 | 15 | impl CastPred { 16 | pub fn new(child: ArcPredicateNode, cast_to: DataType) -> Self { 17 | CastPred( 18 | PredicateNode { 19 | typ: PredicateType::Cast, 20 | children: vec![child, DataTypePred::new(cast_to).into_pred_node()], 21 | data: None, 22 | } 23 | .into(), 24 | ) 25 | } 26 | 27 | pub fn child(&self) -> ArcPredicateNode { 28 | self.0.child(0) 29 | } 30 | 31 | pub fn cast_to(&self) -> DataType { 32 | DataTypePred::from_pred_node(self.0.child(1)) 33 | .unwrap() 34 | .data_type() 35 | } 36 | } 37 | 38 | impl ReprPredicateNode for CastPred { 39 | fn into_pred_node(self) -> ArcPredicateNode { 40 | self.0 41 | } 42 | 43 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 44 | if !matches!(pred_node.typ, PredicateType::Cast) { 45 | return None; 46 | } 47 | Some(Self(pred_node)) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/predicate_physical_expression_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "predicate_physical_expression_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub physical_expr_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub predicate_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::physical_expression::Entity", 18 | from = "Column::PhysicalExprId", 19 | to = "super::physical_expression::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | PhysicalExpression, 24 | #[sea_orm( 25 | belongs_to = "super::predicate::Entity", 26 | from = "Column::PredicateId", 27 | to = "super::predicate::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | Predicate, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::PhysicalExpression.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::Predicate.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/physical_expression_to_statistic_junction.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "physical_expression_to_statistic_junction")] 7 | pub struct Model { 8 | #[sea_orm(primary_key, auto_increment = false)] 9 | pub physical_expression_id: i32, 10 | #[sea_orm(primary_key, auto_increment = false)] 11 | pub statistic_id: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm( 17 | belongs_to = "super::physical_expression::Entity", 18 | from = "Column::PhysicalExpressionId", 19 | to = "super::physical_expression::Column::Id", 20 | on_update = "Cascade", 21 | on_delete = "Cascade" 22 | )] 23 | PhysicalExpression, 24 | #[sea_orm( 25 | belongs_to = "super::statistic::Entity", 26 | from = "Column::StatisticId", 27 | to = "super::statistic::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | Statistic, 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | Relation::PhysicalExpression.def() 37 | } 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::Statistic.def() 43 | } 44 | } 45 | 46 | impl ActiveModelBehavior for ActiveModel {} 47 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/un_op_pred.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 4 | 5 | /// TODO: documentation 6 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 7 | pub enum UnOpType { 8 | Neg = 1, 9 | Not, 10 | } 11 | 12 | impl Display for UnOpType { 13 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 14 | write!(f, "{:?}", self) 15 | } 16 | } 17 | 18 | #[derive(Clone, Debug)] 19 | pub struct UnOpPred(pub ArcPredicateNode); 20 | 21 | impl UnOpPred { 22 | pub fn new(child: ArcPredicateNode, op_type: UnOpType) -> Self { 23 | UnOpPred( 24 | PredicateNode { 25 | typ: PredicateType::UnOp(op_type), 26 | children: vec![child], 27 | data: None, 28 | } 29 | .into(), 30 | ) 31 | } 32 | 33 | pub fn child(&self) -> ArcPredicateNode { 34 | self.0.child(0) 35 | } 36 | 37 | pub fn op_type(&self) -> UnOpType { 38 | if let PredicateType::UnOp(op_type) = self.0.typ { 39 | op_type 40 | } else { 41 | panic!("not a un op") 42 | } 43 | } 44 | } 45 | 46 | impl ReprPredicateNode for UnOpPred { 47 | fn into_pred_node(self) -> ArcPredicateNode { 48 | self.0 49 | } 50 | 51 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 52 | if !matches!(pred_node.typ, PredicateType::UnOp(_)) { 53 | return None; 54 | } 55 | Some(Self(pred_node)) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/prelude.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | pub use super::attribute::Entity as Attribute; 4 | pub use super::attribute_constraint_junction::Entity as AttributeConstraintJunction; 5 | pub use super::attribute_foreign_constraint_junction::Entity as AttributeForeignConstraintJunction; 6 | pub use super::cascades_group::Entity as CascadesGroup; 7 | pub use super::constraint_metadata::Entity as ConstraintMetadata; 8 | pub use super::database_metadata::Entity as DatabaseMetadata; 9 | pub use super::event::Entity as Event; 10 | pub use super::group_winner::Entity as GroupWinner; 11 | pub use super::index_metadata::Entity as IndexMetadata; 12 | pub use super::logical_children::Entity as LogicalChildren; 13 | pub use super::logical_expression::Entity as LogicalExpression; 14 | pub use super::logical_property::Entity as LogicalProperty; 15 | pub use super::namespace_metadata::Entity as NamespaceMetadata; 16 | pub use super::physical_children::Entity as PhysicalChildren; 17 | pub use super::physical_expression::Entity as PhysicalExpression; 18 | pub use super::physical_expression_to_statistic_junction::Entity as PhysicalExpressionToStatisticJunction; 19 | pub use super::physical_property::Entity as PhysicalProperty; 20 | pub use super::plan_cost::Entity as PlanCost; 21 | pub use super::statistic::Entity as Statistic; 22 | pub use super::statistic_to_attribute_junction::Entity as StatisticToAttributeJunction; 23 | pub use super::table_metadata::Entity as TableMetadata; 24 | pub use super::trigger::Entity as Trigger; 25 | pub use super::versioned_statistic::Entity as VersionedStatistic; 26 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/join/nested_loop_join.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{ 3 | nodes::{ArcPredicateNode, JoinType}, 4 | types::GroupId, 5 | }, 6 | cost_model::CostModelImpl, 7 | storage::CostModelStorageManager, 8 | CostModelResult, EstimatedStatistic, 9 | }; 10 | 11 | use super::get_input_correlation; 12 | 13 | impl CostModelImpl { 14 | #[allow(clippy::too_many_arguments)] 15 | pub async fn get_nlj_row_cnt( 16 | &self, 17 | join_typ: JoinType, 18 | group_id: GroupId, 19 | left_row_cnt: EstimatedStatistic, 20 | right_row_cnt: EstimatedStatistic, 21 | left_group_id: GroupId, 22 | right_group_id: GroupId, 23 | join_cond: ArcPredicateNode, 24 | ) -> CostModelResult { 25 | let selectivity = { 26 | let output_attr_refs = self.memo.get_attribute_refs(group_id); 27 | let left_attr_refs = self.memo.get_attribute_refs(left_group_id); 28 | let right_attr_refs = self.memo.get_attribute_refs(right_group_id); 29 | let input_correlation = get_input_correlation(left_attr_refs, right_attr_refs); 30 | 31 | self.get_join_selectivity_from_expr_tree( 32 | join_typ, 33 | group_id, 34 | join_cond, 35 | output_attr_refs.attr_refs(), 36 | input_correlation, 37 | left_row_cnt.0, 38 | right_row_cnt.0, 39 | ) 40 | .await? 41 | }; 42 | Ok(EstimatedStatistic( 43 | (left_row_cnt.0 * right_row_cnt.0 * selectivity).max(1.0), 44 | )) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_table_metadata.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::namespace_metadata::NamespaceMetadata; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum TableMetadata { 6 | Table, 7 | Id, 8 | Name, 9 | NamespaceId, 10 | CreationTime, 11 | } 12 | 13 | #[derive(DeriveMigrationName)] 14 | pub struct Migration; 15 | 16 | #[async_trait::async_trait] 17 | impl MigrationTrait for Migration { 18 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 19 | manager 20 | .create_table( 21 | Table::create() 22 | .table(TableMetadata::Table) 23 | .if_not_exists() 24 | .col(pk_auto(TableMetadata::Id)) 25 | .col(string(TableMetadata::Name)) 26 | .col(integer(TableMetadata::NamespaceId)) 27 | .foreign_key( 28 | ForeignKey::create() 29 | .from(TableMetadata::Table, TableMetadata::NamespaceId) 30 | .to(NamespaceMetadata::Table, NamespaceMetadata::Id) 31 | .on_delete(ForeignKeyAction::Cascade) 32 | .on_update(ForeignKeyAction::Cascade), 33 | ) 34 | .col(timestamp(TableMetadata::CreationTime)) 35 | .to_owned(), 36 | ) 37 | .await 38 | } 39 | 40 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 41 | manager 42 | .drop_table(Table::drop().table(TableMetadata::Table).to_owned()) 43 | .await 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_namespace_metadata.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::database_metadata::DatabaseMetadata; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum NamespaceMetadata { 6 | Table, 7 | Id, 8 | Name, 9 | DatabaseId, 10 | CreationTime, 11 | } 12 | 13 | #[derive(DeriveMigrationName)] 14 | pub struct Migration; 15 | 16 | #[async_trait::async_trait] 17 | impl MigrationTrait for Migration { 18 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 19 | manager 20 | .create_table( 21 | Table::create() 22 | .table(NamespaceMetadata::Table) 23 | .if_not_exists() 24 | .col(pk_auto(NamespaceMetadata::Id)) 25 | .col(string(NamespaceMetadata::Name)) 26 | .col(integer(NamespaceMetadata::DatabaseId)) 27 | .foreign_key( 28 | ForeignKey::create() 29 | .from(NamespaceMetadata::Table, NamespaceMetadata::DatabaseId) 30 | .to(DatabaseMetadata::Table, DatabaseMetadata::Id) 31 | .on_delete(ForeignKeyAction::Cascade) 32 | .on_update(ForeignKeyAction::Cascade), 33 | ) 34 | .col(timestamp(NamespaceMetadata::CreationTime)) 35 | .to_owned(), 36 | ) 37 | .await 38 | } 39 | 40 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 41 | manager 42 | .drop_table(Table::drop().table(NamespaceMetadata::Table).to_owned()) 43 | .await 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/plan_cost.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel)] 6 | #[sea_orm(table_name = "plan_cost")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub physical_expression_id: i32, 11 | pub epoch_id: i32, 12 | pub cost: Option, 13 | #[sea_orm(column_type = "Float", nullable)] 14 | pub estimated_statistic: Option, 15 | pub is_valid: bool, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation { 20 | #[sea_orm( 21 | belongs_to = "super::event::Entity", 22 | from = "Column::EpochId", 23 | to = "super::event::Column::EpochId", 24 | on_update = "Cascade", 25 | on_delete = "Cascade" 26 | )] 27 | Event, 28 | #[sea_orm(has_many = "super::group_winner::Entity")] 29 | GroupWinner, 30 | #[sea_orm( 31 | belongs_to = "super::physical_expression::Entity", 32 | from = "Column::PhysicalExpressionId", 33 | to = "super::physical_expression::Column::Id", 34 | on_update = "Cascade", 35 | on_delete = "Cascade" 36 | )] 37 | PhysicalExpression, 38 | } 39 | 40 | impl Related for Entity { 41 | fn to() -> RelationDef { 42 | Relation::Event.def() 43 | } 44 | } 45 | 46 | impl Related for Entity { 47 | fn to() -> RelationDef { 48 | Relation::GroupWinner.def() 49 | } 50 | } 51 | 52 | impl Related for Entity { 53 | fn to() -> RelationDef { 54 | Relation::PhysicalExpression.def() 55 | } 56 | } 57 | 58 | impl ActiveModelBehavior for ActiveModel {} 59 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/mod.rs: -------------------------------------------------------------------------------- 1 | //! Entities related to the memo table used for dynamic programming in the Cascades query 2 | //! optimization framework. 3 | 4 | pub(crate) mod m20241029_000001_cascades_group; 5 | pub(crate) mod m20241029_000001_group_winner; 6 | pub(crate) mod m20241029_000001_logical_children; 7 | pub(crate) mod m20241029_000001_logical_expression; 8 | pub(crate) mod m20241029_000001_logical_property; 9 | pub(crate) mod m20241029_000001_physical_children; 10 | pub(crate) mod m20241029_000001_physical_expression; 11 | pub(crate) mod m20241029_000001_physical_property; 12 | pub(crate) mod m20241029_000001_predicate; 13 | pub(crate) mod m20241029_000001_predicate_children; 14 | pub(crate) mod m20241029_000001_predicate_logical_expression_junction; 15 | pub(crate) mod m20241029_000001_predicate_physical_expression_junction; 16 | 17 | pub(crate) use m20241029_000001_cascades_group as cascades_group; 18 | pub(crate) use m20241029_000001_group_winner as group_winner; 19 | pub(crate) use m20241029_000001_logical_children as logical_children; 20 | pub(crate) use m20241029_000001_logical_expression as logical_expression; 21 | pub(crate) use m20241029_000001_logical_property as logical_property; 22 | pub(crate) use m20241029_000001_physical_children as physical_children; 23 | pub(crate) use m20241029_000001_physical_expression as physical_expression; 24 | pub(crate) use m20241029_000001_physical_property as physical_property; 25 | pub(crate) use m20241029_000001_predicate as predicate; 26 | pub(crate) use m20241029_000001_predicate_children as predicate_children; 27 | pub(crate) use m20241029_000001_predicate_logical_expression_junction as predicate_logical_expression_junction; 28 | pub(crate) use m20241029_000001_predicate_physical_expression_junction as predicate_physical_expression_junction; 29 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_attribute.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::table_metadata::TableMetadata; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum Attribute { 6 | Table, 7 | Id, 8 | TableId, 9 | Name, 10 | CompressionMethod, 11 | VariantTag, 12 | BaseAttributeNumber, 13 | IsNotNull, 14 | } 15 | 16 | #[derive(DeriveMigrationName)] 17 | pub struct Migration; 18 | 19 | #[async_trait::async_trait] 20 | impl MigrationTrait for Migration { 21 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 22 | manager 23 | .create_table( 24 | Table::create() 25 | .table(Attribute::Table) 26 | .if_not_exists() 27 | .col(pk_auto(Attribute::Id)) 28 | .col(integer(Attribute::TableId)) 29 | .foreign_key( 30 | ForeignKey::create() 31 | .from(Attribute::Table, Attribute::TableId) 32 | .to(TableMetadata::Table, TableMetadata::Id) 33 | .on_delete(ForeignKeyAction::Cascade) 34 | .on_update(ForeignKeyAction::Cascade), 35 | ) 36 | .col(string(Attribute::Name)) 37 | .col(char(Attribute::CompressionMethod)) 38 | .col(integer(Attribute::VariantTag)) 39 | .col(integer(Attribute::BaseAttributeNumber)) 40 | .col(boolean(Attribute::IsNotNull)) 41 | .to_owned(), 42 | ) 43 | .await 44 | } 45 | 46 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 47 | manager 48 | .drop_table(Table::drop().table(Attribute::Table).to_owned()) 49 | .await 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_logical_property.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a logical property of a Cascades group. 2 | //! 3 | //! TODO what exactly are we storing in here? 4 | //! TODO why is it linked to only cascades groups and not logical expressions? 5 | 6 | use crate::migrator::memo::cascades_group::CascadesGroup; 7 | use sea_orm_migration::{prelude::*, schema::*}; 8 | 9 | #[derive(DeriveIden)] 10 | pub enum LogicalProperty { 11 | Table, 12 | Id, 13 | GroupId, 14 | VariantTag, 15 | Data, 16 | } 17 | 18 | #[derive(DeriveMigrationName)] 19 | pub struct Migration; 20 | 21 | #[async_trait::async_trait] 22 | impl MigrationTrait for Migration { 23 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 24 | manager 25 | .create_table( 26 | Table::create() 27 | .table(LogicalProperty::Table) 28 | .if_not_exists() 29 | .col(pk_auto(LogicalProperty::Id)) 30 | .col(integer(LogicalProperty::GroupId)) 31 | .foreign_key( 32 | ForeignKey::create() 33 | .from(LogicalProperty::Table, LogicalProperty::GroupId) 34 | .to(CascadesGroup::Table, CascadesGroup::Id) 35 | .on_delete(ForeignKeyAction::Cascade) 36 | .on_update(ForeignKeyAction::Cascade), 37 | ) 38 | .col(small_integer(LogicalProperty::VariantTag)) 39 | .col(json(LogicalProperty::Data)) 40 | .to_owned(), 41 | ) 42 | .await 43 | } 44 | 45 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 46 | manager 47 | .drop_table(Table::drop().table(LogicalProperty::Table).to_owned()) 48 | .await 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/table_metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "table_metadata")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub namespace_id: i32, 12 | pub creation_time: DateTimeUtc, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm(has_many = "super::attribute::Entity")] 18 | Attribute, 19 | #[sea_orm(has_many = "super::index_metadata::Entity")] 20 | IndexMetadata, 21 | #[sea_orm( 22 | belongs_to = "super::namespace_metadata::Entity", 23 | from = "Column::NamespaceId", 24 | to = "super::namespace_metadata::Column::Id", 25 | on_update = "Cascade", 26 | on_delete = "Cascade" 27 | )] 28 | NamespaceMetadata, 29 | #[sea_orm(has_many = "super::statistic::Entity")] 30 | Statistic, 31 | #[sea_orm(has_many = "super::trigger::Entity")] 32 | Trigger, 33 | } 34 | 35 | impl Related for Entity { 36 | fn to() -> RelationDef { 37 | Relation::Attribute.def() 38 | } 39 | } 40 | 41 | impl Related for Entity { 42 | fn to() -> RelationDef { 43 | Relation::IndexMetadata.def() 44 | } 45 | } 46 | 47 | impl Related for Entity { 48 | fn to() -> RelationDef { 49 | Relation::NamespaceMetadata.def() 50 | } 51 | } 52 | 53 | impl Related for Entity { 54 | fn to() -> RelationDef { 55 | Relation::Statistic.def() 56 | } 57 | } 58 | 59 | impl Related for Entity { 60 | fn to() -> RelationDef { 61 | Relation::Trigger.def() 62 | } 63 | } 64 | 65 | impl ActiveModelBehavior for ActiveModel {} 66 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/join/hash_join.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{nodes::JoinType, predicates::list_pred::ListPred, types::GroupId}, 3 | cost_model::CostModelImpl, 4 | storage::CostModelStorageManager, 5 | CostModelResult, EstimatedStatistic, 6 | }; 7 | 8 | use super::get_input_correlation; 9 | 10 | impl CostModelImpl { 11 | #[allow(clippy::too_many_arguments)] 12 | pub async fn get_hash_join_row_cnt( 13 | &self, 14 | join_typ: JoinType, 15 | group_id: GroupId, 16 | left_row_cnt: EstimatedStatistic, 17 | right_row_cnt: EstimatedStatistic, 18 | left_group_id: GroupId, 19 | right_group_id: GroupId, 20 | left_keys: ListPred, 21 | right_keys: ListPred, 22 | ) -> CostModelResult { 23 | let selectivity = { 24 | let output_attr_refs = self.memo.get_attribute_refs(group_id); 25 | let left_attr_refs = self.memo.get_attribute_refs(left_group_id); 26 | let right_attr_refs = self.memo.get_attribute_refs(right_group_id); 27 | let left_attr_cnt = left_attr_refs.attr_refs().len(); 28 | // there may be more than one expression tree in a group. 29 | // see comment in PredicateType::PhysicalFilter(_) for more information 30 | let input_correlation = get_input_correlation(left_attr_refs, right_attr_refs); 31 | self.get_join_selectivity_from_keys( 32 | join_typ, 33 | group_id, 34 | left_keys, 35 | right_keys, 36 | output_attr_refs.attr_refs(), 37 | input_correlation, 38 | left_row_cnt.0, 39 | right_row_cnt.0, 40 | left_attr_cnt, 41 | ) 42 | .await? 43 | }; 44 | Ok(EstimatedStatistic( 45 | (left_row_cnt.0 * right_row_cnt.0 * selectivity).max(1.0), 46 | )) 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/predicate.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.1 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "predicate")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub data: Json, 11 | pub variant: i32, 12 | } 13 | 14 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 15 | pub enum Relation { 16 | #[sea_orm(has_many = "super::predicate_logical_expression_junction::Entity")] 17 | PredicateLogicalExpressionJunction, 18 | #[sea_orm(has_many = "super::predicate_physical_expression_junction::Entity")] 19 | PredicatePhysicalExpressionJunction, 20 | } 21 | 22 | impl Related for Entity { 23 | fn to() -> RelationDef { 24 | Relation::PredicateLogicalExpressionJunction.def() 25 | } 26 | } 27 | 28 | impl Related for Entity { 29 | fn to() -> RelationDef { 30 | Relation::PredicatePhysicalExpressionJunction.def() 31 | } 32 | } 33 | 34 | impl Related for Entity { 35 | fn to() -> RelationDef { 36 | super::predicate_logical_expression_junction::Relation::LogicalExpression.def() 37 | } 38 | fn via() -> Option { 39 | Some( 40 | super::predicate_logical_expression_junction::Relation::Predicate 41 | .def() 42 | .rev(), 43 | ) 44 | } 45 | } 46 | 47 | impl Related for Entity { 48 | fn to() -> RelationDef { 49 | super::predicate_physical_expression_junction::Relation::PhysicalExpression.def() 50 | } 51 | fn via() -> Option { 52 | Some( 53 | super::predicate_physical_expression_junction::Relation::Predicate 54 | .def() 55 | .rev(), 56 | ) 57 | } 58 | } 59 | 60 | impl ActiveModelBehavior for ActiveModel {} 61 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/like_pred.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::common::{ 4 | nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}, 5 | values::Value, 6 | }; 7 | 8 | #[derive(Clone, Debug)] 9 | pub struct LikePred(pub ArcPredicateNode); 10 | 11 | impl LikePred { 12 | pub fn new( 13 | negated: bool, 14 | case_insensitive: bool, 15 | child: ArcPredicateNode, 16 | pattern: ArcPredicateNode, 17 | ) -> Self { 18 | // TODO: support multiple values in data. 19 | let negated = if negated { 1 } else { 0 }; 20 | let case_insensitive = if case_insensitive { 1 } else { 0 }; 21 | LikePred( 22 | PredicateNode { 23 | typ: PredicateType::Like, 24 | children: vec![child.into_pred_node(), pattern.into_pred_node()], 25 | data: Some(Value::Serialized(Arc::new([negated, case_insensitive]))), 26 | } 27 | .into(), 28 | ) 29 | } 30 | 31 | pub fn child(&self) -> ArcPredicateNode { 32 | self.0.child(0) 33 | } 34 | 35 | pub fn pattern(&self) -> ArcPredicateNode { 36 | self.0.child(1) 37 | } 38 | 39 | /// `true` for `NOT LIKE`. 40 | pub fn negated(&self) -> bool { 41 | match self.0.data.as_ref().unwrap() { 42 | Value::Serialized(data) => data[0] != 0, 43 | _ => panic!("not a serialized value"), 44 | } 45 | } 46 | 47 | pub fn case_insensitive(&self) -> bool { 48 | match self.0.data.as_ref().unwrap() { 49 | Value::Serialized(data) => data[1] != 0, 50 | _ => panic!("not a serialized value"), 51 | } 52 | } 53 | } 54 | 55 | impl ReprPredicateNode for LikePred { 56 | fn into_pred_node(self) -> ArcPredicateNode { 57 | self.0 58 | } 59 | 60 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 61 | if !matches!(pred_node.typ, PredicateType::Like) { 62 | return None; 63 | } 64 | Some(Self(pred_node)) 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_trigger.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::table_metadata::TableMetadata; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum Trigger { 6 | Table, 7 | Id, 8 | Name, 9 | TableId, 10 | ParentTriggerId, 11 | Function, 12 | } 13 | 14 | #[derive(DeriveMigrationName)] 15 | pub struct Migration; 16 | 17 | #[async_trait::async_trait] 18 | impl MigrationTrait for Migration { 19 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 20 | manager 21 | .create_table( 22 | Table::create() 23 | .table(Trigger::Table) 24 | .if_not_exists() 25 | .col(pk_auto(Trigger::Id)) 26 | .col(string(Trigger::Name)) 27 | .col(integer(Trigger::TableId)) 28 | .foreign_key( 29 | ForeignKey::create() 30 | .from(Trigger::Table, Trigger::TableId) 31 | .to(TableMetadata::Table, TableMetadata::Id) 32 | .on_delete(ForeignKeyAction::Cascade) 33 | .on_update(ForeignKeyAction::Cascade), 34 | ) 35 | .col(integer(Trigger::ParentTriggerId)) 36 | .foreign_key( 37 | ForeignKey::create() 38 | .from(Trigger::Table, Trigger::ParentTriggerId) 39 | .to(Trigger::Table, Trigger::Id) 40 | .on_delete(ForeignKeyAction::Cascade) 41 | .on_update(ForeignKeyAction::Cascade), 42 | ) 43 | .col(json(Trigger::Function)) 44 | .to_owned(), 45 | ) 46 | .await 47 | } 48 | 49 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 50 | manager 51 | .drop_table(Table::drop().table(Trigger::Table).to_owned()) 52 | .await 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/mod.rs: -------------------------------------------------------------------------------- 1 | use sea_orm_migration::prelude::*; 2 | 3 | mod catalog; 4 | mod cost_model; 5 | mod memo; 6 | 7 | pub struct Migrator; 8 | 9 | #[async_trait::async_trait] 10 | impl MigratorTrait for Migrator { 11 | fn migrations() -> Vec> { 12 | vec![ 13 | Box::new(catalog::database_metadata::Migration), 14 | Box::new(catalog::namespace_metadata::Migration), 15 | Box::new(catalog::table_metadata::Migration), 16 | Box::new(catalog::attribute::Migration), 17 | Box::new(catalog::attribute_constraint_junction::Migration), 18 | Box::new(catalog::attribute_foreign_constraint_junction::Migration), 19 | Box::new(catalog::index_metadata::Migration), 20 | Box::new(catalog::trigger::Migration), 21 | Box::new(catalog::constraint_metadata::Migration), 22 | Box::new(cost_model::statistic::Migration), 23 | Box::new(cost_model::versioned_statistic::Migration), 24 | Box::new(cost_model::statistic_to_attribute_junction::Migration), 25 | Box::new(cost_model::physical_expression_to_statistic_junction::Migration), 26 | Box::new(cost_model::event::Migration), 27 | Box::new(cost_model::plan_cost::Migration), 28 | Box::new(memo::cascades_group::Migration), 29 | Box::new(memo::group_winner::Migration), 30 | Box::new(memo::logical_expression::Migration), 31 | Box::new(memo::logical_children::Migration), 32 | Box::new(memo::logical_property::Migration), 33 | Box::new(memo::physical_expression::Migration), 34 | Box::new(memo::physical_children::Migration), 35 | Box::new(memo::physical_property::Migration), 36 | Box::new(memo::predicate::Migration), 37 | Box::new(memo::predicate_children::Migration), 38 | Box::new(memo::predicate_logical_expression_junction::Migration), 39 | Box::new(memo::predicate_physical_expression_junction::Migration), 40 | ] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_physical_property.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a physical property of a physical expression in the Cascades framework. 2 | //! 3 | //! TODO what exactly are we storing in here? 4 | //! TODO why is it linked to only physical expressions and not cascades groups? 5 | 6 | use crate::migrator::memo::physical_expression::PhysicalExpression; 7 | use sea_orm_migration::{prelude::*, schema::*}; 8 | 9 | #[derive(DeriveIden)] 10 | pub enum PhysicalProperty { 11 | Table, 12 | Id, 13 | PhysicalExpressionId, 14 | VariantTag, 15 | Data, 16 | } 17 | 18 | #[derive(DeriveMigrationName)] 19 | pub struct Migration; 20 | 21 | #[async_trait::async_trait] 22 | impl MigrationTrait for Migration { 23 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 24 | manager 25 | .create_table( 26 | Table::create() 27 | .table(PhysicalProperty::Table) 28 | .if_not_exists() 29 | .col(pk_auto(PhysicalProperty::Id)) 30 | .col(integer(PhysicalProperty::PhysicalExpressionId)) 31 | .foreign_key( 32 | ForeignKey::create() 33 | .from( 34 | PhysicalProperty::Table, 35 | PhysicalProperty::PhysicalExpressionId, 36 | ) 37 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 38 | .on_delete(ForeignKeyAction::Cascade) 39 | .on_update(ForeignKeyAction::Cascade), 40 | ) 41 | .col(small_integer(PhysicalProperty::VariantTag)) 42 | .col(json(PhysicalProperty::Data)) 43 | .to_owned(), 44 | ) 45 | .await 46 | } 47 | 48 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 49 | manager 50 | .drop_table(Table::drop().table(PhysicalProperty::Table).to_owned()) 51 | .await 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/group_winner.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "group_winner")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub group_id: i32, 11 | pub physical_expression_id: i32, 12 | pub cost_id: i32, 13 | pub epoch_id: i32, 14 | } 15 | 16 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 17 | pub enum Relation { 18 | #[sea_orm( 19 | belongs_to = "super::cascades_group::Entity", 20 | from = "Column::GroupId", 21 | to = "super::cascades_group::Column::Id", 22 | on_update = "Cascade", 23 | on_delete = "Cascade" 24 | )] 25 | CascadesGroup, 26 | #[sea_orm( 27 | belongs_to = "super::event::Entity", 28 | from = "Column::EpochId", 29 | to = "super::event::Column::EpochId", 30 | on_update = "Cascade", 31 | on_delete = "Cascade" 32 | )] 33 | Event, 34 | #[sea_orm( 35 | belongs_to = "super::physical_expression::Entity", 36 | from = "Column::PhysicalExpressionId", 37 | to = "super::physical_expression::Column::Id", 38 | on_update = "Cascade", 39 | on_delete = "Cascade" 40 | )] 41 | PhysicalExpression, 42 | #[sea_orm( 43 | belongs_to = "super::plan_cost::Entity", 44 | from = "Column::CostId", 45 | to = "super::plan_cost::Column::Id", 46 | on_update = "Cascade", 47 | on_delete = "Cascade" 48 | )] 49 | PlanCost, 50 | } 51 | 52 | impl Related for Entity { 53 | fn to() -> RelationDef { 54 | Relation::CascadesGroup.def() 55 | } 56 | } 57 | 58 | impl Related for Entity { 59 | fn to() -> RelationDef { 60 | Relation::Event.def() 61 | } 62 | } 63 | 64 | impl Related for Entity { 65 | fn to() -> RelationDef { 66 | Relation::PhysicalExpression.def() 67 | } 68 | } 69 | 70 | impl Related for Entity { 71 | fn to() -> RelationDef { 72 | Relation::PlanCost.def() 73 | } 74 | } 75 | 76 | impl ActiveModelBehavior for ActiveModel {} 77 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_index_metadata.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::table_metadata::TableMetadata; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum IndexMetadata { 6 | Table, 7 | Id, 8 | TableId, 9 | Name, 10 | NumberOfAttributes, 11 | VariantTag, 12 | IsUnique, 13 | NullsNotDistinct, 14 | IsPrimary, 15 | IsClustered, 16 | IsExclusion, 17 | Description, 18 | } 19 | 20 | #[derive(DeriveMigrationName)] 21 | pub struct Migration; 22 | 23 | #[async_trait::async_trait] 24 | impl MigrationTrait for Migration { 25 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 26 | manager 27 | .create_table( 28 | Table::create() 29 | .table(IndexMetadata::Table) 30 | .if_not_exists() 31 | .col(pk_auto(IndexMetadata::Id)) 32 | .col(integer(IndexMetadata::TableId)) 33 | .foreign_key( 34 | ForeignKey::create() 35 | .from(IndexMetadata::Table, IndexMetadata::TableId) 36 | .to(TableMetadata::Table, TableMetadata::Id) 37 | .on_delete(ForeignKeyAction::Cascade) 38 | .on_update(ForeignKeyAction::Cascade), 39 | ) 40 | .col(string(IndexMetadata::Name)) 41 | .col(integer(IndexMetadata::NumberOfAttributes)) 42 | .col(integer(IndexMetadata::VariantTag)) 43 | .col(boolean(IndexMetadata::IsUnique)) 44 | .col(boolean(IndexMetadata::NullsNotDistinct)) 45 | .col(boolean(IndexMetadata::IsPrimary)) 46 | .col(boolean(IndexMetadata::IsClustered)) 47 | .col(boolean(IndexMetadata::IsExclusion)) 48 | .col(string(IndexMetadata::Description)) 49 | .to_owned(), 50 | ) 51 | .await 52 | } 53 | 54 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 55 | manager 56 | .drop_table(Table::drop().table(IndexMetadata::Table).to_owned()) 57 | .await 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/constraint_metadata.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "constraint_metadata")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub variant_tag: i32, 12 | pub table_id: Option, 13 | pub index_id: Option, 14 | pub foreign_ref_id: Option, 15 | pub check_src: String, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation { 20 | #[sea_orm(has_many = "super::attribute_constraint_junction::Entity")] 21 | AttributeConstraintJunction, 22 | #[sea_orm(has_many = "super::attribute_foreign_constraint_junction::Entity")] 23 | AttributeForeignConstraintJunction, 24 | #[sea_orm( 25 | belongs_to = "super::index_metadata::Entity", 26 | from = "Column::IndexId", 27 | to = "super::index_metadata::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | IndexMetadata, 32 | #[sea_orm( 33 | belongs_to = "super::table_metadata::Entity", 34 | from = "Column::ForeignRefId", 35 | to = "super::table_metadata::Column::Id", 36 | on_update = "Cascade", 37 | on_delete = "Cascade" 38 | )] 39 | TableMetadata2, 40 | #[sea_orm( 41 | belongs_to = "super::table_metadata::Entity", 42 | from = "Column::TableId", 43 | to = "super::table_metadata::Column::Id", 44 | on_update = "Cascade", 45 | on_delete = "Cascade" 46 | )] 47 | TableMetadata1, 48 | } 49 | 50 | impl Related for Entity { 51 | fn to() -> RelationDef { 52 | Relation::AttributeConstraintJunction.def() 53 | } 54 | } 55 | 56 | impl Related for Entity { 57 | fn to() -> RelationDef { 58 | Relation::AttributeForeignConstraintJunction.def() 59 | } 60 | } 61 | 62 | impl Related for Entity { 63 | fn to() -> RelationDef { 64 | Relation::IndexMetadata.def() 65 | } 66 | } 67 | 68 | impl ActiveModelBehavior for ActiveModel {} 69 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/bin_op_pred.rs: -------------------------------------------------------------------------------- 1 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 2 | 3 | /// TODO: documentation 4 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 5 | pub enum BinOpType { 6 | // numerical 7 | Add, 8 | Sub, 9 | Mul, 10 | Div, 11 | Mod, 12 | 13 | // comparison 14 | Eq, 15 | Neq, 16 | Gt, 17 | Lt, 18 | Geq, 19 | Leq, 20 | } 21 | 22 | impl std::fmt::Display for BinOpType { 23 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 24 | write!(f, "{:?}", self) 25 | } 26 | } 27 | 28 | impl BinOpType { 29 | pub fn is_numerical(&self) -> bool { 30 | matches!( 31 | self, 32 | Self::Add | Self::Sub | Self::Mul | Self::Div | Self::Mod 33 | ) 34 | } 35 | 36 | pub fn is_comparison(&self) -> bool { 37 | matches!( 38 | self, 39 | Self::Eq | Self::Neq | Self::Gt | Self::Lt | Self::Geq | Self::Leq 40 | ) 41 | } 42 | } 43 | 44 | #[derive(Clone, Debug)] 45 | pub struct BinOpPred(pub ArcPredicateNode); 46 | 47 | impl BinOpPred { 48 | pub fn new(left: ArcPredicateNode, right: ArcPredicateNode, op_type: BinOpType) -> Self { 49 | BinOpPred( 50 | PredicateNode { 51 | typ: PredicateType::BinOp(op_type), 52 | children: vec![left, right], 53 | data: None, 54 | } 55 | .into(), 56 | ) 57 | } 58 | 59 | pub fn left_child(&self) -> ArcPredicateNode { 60 | self.0.child(0) 61 | } 62 | 63 | pub fn right_child(&self) -> ArcPredicateNode { 64 | self.0.child(1) 65 | } 66 | 67 | pub fn op_type(&self) -> BinOpType { 68 | if let PredicateType::BinOp(op_type) = self.0.typ { 69 | op_type 70 | } else { 71 | panic!("not a bin op") 72 | } 73 | } 74 | } 75 | 76 | impl ReprPredicateNode for BinOpPred { 77 | fn into_pred_node(self) -> ArcPredicateNode { 78 | self.0 79 | } 80 | 81 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 82 | if !matches!(pred_node.typ, PredicateType::BinOp(_)) { 83 | return None; 84 | } 85 | Some(Self(pred_node)) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_predicate_children.rs: -------------------------------------------------------------------------------- 1 | /* 2 | Table predicate_children { 3 | parent_id integer [ref: > predicate.id] 4 | child_id integer [ref: > predicate.id] 5 | } 6 | */ 7 | 8 | use sea_orm_migration::{prelude::*, schema::integer}; 9 | 10 | use super::m20241029_000001_predicate::Predicate; 11 | 12 | #[derive(Iden)] 13 | pub enum PredicateChildren { 14 | Table, 15 | ParentId, 16 | ChildId, 17 | } 18 | 19 | #[derive(DeriveMigrationName)] 20 | pub struct Migration; 21 | 22 | #[async_trait::async_trait] 23 | impl MigrationTrait for Migration { 24 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 25 | manager 26 | .create_table( 27 | Table::create() 28 | .table(PredicateChildren::Table) 29 | .if_not_exists() 30 | .col(integer(PredicateChildren::ParentId)) 31 | .foreign_key( 32 | ForeignKey::create() 33 | .from(PredicateChildren::Table, PredicateChildren::ParentId) 34 | .to(Predicate::Table, Predicate::Id) 35 | .on_delete(ForeignKeyAction::Cascade) 36 | .on_update(ForeignKeyAction::Cascade), 37 | ) 38 | .col(integer(PredicateChildren::ChildId)) 39 | .foreign_key( 40 | ForeignKey::create() 41 | .from(PredicateChildren::Table, PredicateChildren::ChildId) 42 | .to(Predicate::Table, Predicate::Id) 43 | .on_delete(ForeignKeyAction::Cascade) 44 | .on_update(ForeignKeyAction::Cascade), 45 | ) 46 | .primary_key( 47 | Index::create() 48 | .col(PredicateChildren::ParentId) 49 | .col(PredicateChildren::ChildId), 50 | ) 51 | .to_owned(), 52 | ) 53 | .await 54 | } 55 | 56 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 57 | manager 58 | .drop_table(Table::drop().table(PredicateChildren::Table).to_owned()) 59 | .await 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/types.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | /// TODO: Implement from and to methods for the following types to enable conversion 4 | /// to and from their persistent counterparts. 5 | 6 | /// TODO: documentation 7 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 8 | pub struct GroupId(pub u64); 9 | 10 | /// TODO: documentation 11 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 12 | pub struct ExprId(pub u64); 13 | 14 | /// TODO: documentation 15 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 16 | pub struct TableId(pub u64); 17 | 18 | /// TODO: documentation 19 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 20 | pub struct AttrId(pub u64); 21 | 22 | /// TODO: documentation 23 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default, Hash)] 24 | pub struct EpochId(pub u64); 25 | 26 | impl Display for GroupId { 27 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 28 | write!(f, "!{}", self.0) 29 | } 30 | } 31 | 32 | impl Display for ExprId { 33 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 34 | write!(f, "{}", self.0) 35 | } 36 | } 37 | 38 | impl Display for TableId { 39 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 40 | write!(f, "Table#{}", self.0) 41 | } 42 | } 43 | 44 | impl Display for AttrId { 45 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 46 | write!(f, "Attr#{}", self.0) 47 | } 48 | } 49 | 50 | impl Display for EpochId { 51 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 52 | write!(f, "Epoch#{}", self.0) 53 | } 54 | } 55 | 56 | impl From for i32 { 57 | fn from(id: GroupId) -> i32 { 58 | id.0 as i32 59 | } 60 | } 61 | 62 | impl From for i32 { 63 | fn from(id: ExprId) -> i32 { 64 | id.0 as i32 65 | } 66 | } 67 | 68 | impl From for i32 { 69 | fn from(id: TableId) -> i32 { 70 | id.0 as i32 71 | } 72 | } 73 | 74 | impl From for i32 { 75 | fn from(id: AttrId) -> i32 { 76 | id.0 as i32 77 | } 78 | } 79 | 80 | impl From for i32 { 81 | fn from(id: EpochId) -> i32 { 82 | id.0 as i32 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /optd-persistent/src/memo/expression.rs: -------------------------------------------------------------------------------- 1 | use crate::entities::*; 2 | use std::hash::{DefaultHasher, Hash, Hasher}; 3 | 4 | /// All of the different types of fixed logical operators. 5 | /// 6 | /// Note that there could be more operators that the memo table must support that are not enumerated 7 | /// in this enum, as there can be up to `2^16` different types of operators. 8 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] 9 | #[non_exhaustive] 10 | #[repr(i16)] 11 | pub enum LogicalOperator { 12 | Scan, 13 | Join, 14 | } 15 | 16 | /// All of the different types of fixed physical operators. 17 | /// 18 | /// Note that there could be more operators that the memo table must support that are not enumerated 19 | /// in this enum, as there can be up to `2^16` different types of operators. 20 | #[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] 21 | #[non_exhaustive] 22 | #[repr(i16)] 23 | pub enum PhysicalOperator { 24 | TableScan, 25 | IndexScan, 26 | NestedLoopJoin, 27 | HashJoin, 28 | } 29 | 30 | /// A method to generate a fingerprint used to efficiently check if two 31 | /// expressions are equivalent. 32 | /// 33 | /// TODO actually make efficient. 34 | fn fingerprint(variant_tag: i16, data: &serde_json::Value) -> i64 { 35 | let mut hasher = DefaultHasher::new(); 36 | 37 | variant_tag.hash(&mut hasher); 38 | data.hash(&mut hasher); 39 | 40 | hasher.finish() as i64 41 | } 42 | 43 | impl logical_expression::Model { 44 | /// Creates a new logical expression with an unset `id` and `group_id`. 45 | pub fn new(variant_tag: LogicalOperator, data: serde_json::Value) -> Self { 46 | let tag = variant_tag as i16; 47 | let fingerprint = fingerprint(tag, &data); 48 | 49 | Self { 50 | id: 0, 51 | group_id: 0, 52 | fingerprint, 53 | variant_tag: tag, 54 | data, 55 | } 56 | } 57 | } 58 | 59 | impl physical_expression::Model { 60 | /// Creates a new physical expression with an unset `id` and `group_id`. 61 | pub fn new(variant_tag: PhysicalOperator, data: serde_json::Value) -> Self { 62 | let tag = variant_tag as i16; 63 | let fingerprint = fingerprint(tag, &data); 64 | 65 | Self { 66 | id: 0, 67 | group_id: 0, 68 | fingerprint, 69 | variant_tag: tag, 70 | data, 71 | } 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_versioned_statistic.rs: -------------------------------------------------------------------------------- 1 | //! This table stores the versioned statistics. It includes all the histories of the statistics. 2 | //! 3 | //! If a statistic is updated/inserted, please insert one new row into this table. 4 | 5 | use crate::migrator::cost_model::{event::Event, statistic::Statistic}; 6 | use sea_orm_migration::{prelude::*, schema::*}; 7 | 8 | #[derive(Iden)] 9 | pub enum VersionedStatistic { 10 | Table, 11 | Id, 12 | EpochId, 13 | // Real statistic id. 14 | StatisticId, 15 | StatisticValue, 16 | } 17 | 18 | #[derive(DeriveMigrationName)] 19 | pub struct Migration; 20 | 21 | #[async_trait::async_trait] 22 | impl MigrationTrait for Migration { 23 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 24 | manager 25 | .create_table( 26 | Table::create() 27 | .table(VersionedStatistic::Table) 28 | .if_not_exists() 29 | .col(pk_auto(VersionedStatistic::Id)) 30 | .col(integer(VersionedStatistic::EpochId)) 31 | .foreign_key( 32 | ForeignKey::create() 33 | .from(VersionedStatistic::Table, VersionedStatistic::EpochId) 34 | .to(Event::Table, Event::EpochId) 35 | .on_delete(ForeignKeyAction::Cascade) 36 | .on_update(ForeignKeyAction::Cascade), 37 | ) 38 | .col(integer(VersionedStatistic::StatisticId)) 39 | .foreign_key( 40 | ForeignKey::create() 41 | .from(VersionedStatistic::Table, VersionedStatistic::StatisticId) 42 | .to(Statistic::Table, Statistic::Id) 43 | .on_delete(ForeignKeyAction::Cascade) 44 | .on_update(ForeignKeyAction::Cascade), 45 | ) 46 | .col(json(VersionedStatistic::StatisticValue)) 47 | .to_owned(), 48 | ) 49 | .await 50 | } 51 | 52 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 53 | manager 54 | .drop_table(Table::drop().table(VersionedStatistic::Table).to_owned()) 55 | .await 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/attribute.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "attribute")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub table_id: i32, 11 | pub name: String, 12 | pub compression_method: String, 13 | pub variant_tag: i32, 14 | pub base_attribute_number: i32, 15 | pub is_not_null: bool, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation { 20 | #[sea_orm(has_many = "super::attribute_constraint_junction::Entity")] 21 | AttributeConstraintJunction, 22 | #[sea_orm(has_many = "super::attribute_foreign_constraint_junction::Entity")] 23 | AttributeForeignConstraintJunction, 24 | #[sea_orm(has_many = "super::statistic_to_attribute_junction::Entity")] 25 | StatisticToAttributeJunction, 26 | #[sea_orm( 27 | belongs_to = "super::table_metadata::Entity", 28 | from = "Column::TableId", 29 | to = "super::table_metadata::Column::Id", 30 | on_update = "Cascade", 31 | on_delete = "Cascade" 32 | )] 33 | TableMetadata, 34 | } 35 | 36 | impl Related for Entity { 37 | fn to() -> RelationDef { 38 | Relation::AttributeConstraintJunction.def() 39 | } 40 | } 41 | 42 | impl Related for Entity { 43 | fn to() -> RelationDef { 44 | Relation::AttributeForeignConstraintJunction.def() 45 | } 46 | } 47 | 48 | impl Related for Entity { 49 | fn to() -> RelationDef { 50 | Relation::StatisticToAttributeJunction.def() 51 | } 52 | } 53 | 54 | impl Related for Entity { 55 | fn to() -> RelationDef { 56 | Relation::TableMetadata.def() 57 | } 58 | } 59 | 60 | impl Related for Entity { 61 | fn to() -> RelationDef { 62 | super::statistic_to_attribute_junction::Relation::Statistic.def() 63 | } 64 | fn via() -> Option { 65 | Some( 66 | super::statistic_to_attribute_junction::Relation::Attribute 67 | .def() 68 | .rev(), 69 | ) 70 | } 71 | } 72 | 73 | impl ActiveModelBehavior for ActiveModel {} 74 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/cascades_group.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "cascades_group")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub latest_winner: Option, 11 | pub in_progress: bool, 12 | pub is_optimized: bool, 13 | } 14 | 15 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 16 | pub enum Relation { 17 | #[sea_orm(has_many = "super::group_winner::Entity")] 18 | GroupWinner, 19 | #[sea_orm(has_many = "super::logical_children::Entity")] 20 | LogicalChildren, 21 | #[sea_orm(has_many = "super::logical_expression::Entity")] 22 | LogicalExpression, 23 | #[sea_orm(has_many = "super::logical_property::Entity")] 24 | LogicalProperty, 25 | #[sea_orm(has_many = "super::physical_children::Entity")] 26 | PhysicalChildren, 27 | #[sea_orm( 28 | belongs_to = "super::physical_expression::Entity", 29 | from = "Column::LatestWinner", 30 | to = "super::physical_expression::Column::Id", 31 | on_update = "Cascade", 32 | on_delete = "SetNull" 33 | )] 34 | PhysicalExpression, 35 | } 36 | 37 | impl Related for Entity { 38 | fn to() -> RelationDef { 39 | Relation::GroupWinner.def() 40 | } 41 | } 42 | 43 | impl Related for Entity { 44 | fn to() -> RelationDef { 45 | Relation::LogicalChildren.def() 46 | } 47 | } 48 | 49 | impl Related for Entity { 50 | fn to() -> RelationDef { 51 | Relation::LogicalProperty.def() 52 | } 53 | } 54 | 55 | impl Related for Entity { 56 | fn to() -> RelationDef { 57 | Relation::PhysicalChildren.def() 58 | } 59 | } 60 | 61 | impl Related for Entity { 62 | fn to() -> RelationDef { 63 | super::logical_children::Relation::LogicalExpression.def() 64 | } 65 | fn via() -> Option { 66 | Some(super::logical_children::Relation::CascadesGroup.def().rev()) 67 | } 68 | } 69 | 70 | impl Related for Entity { 71 | fn to() -> RelationDef { 72 | super::physical_children::Relation::PhysicalExpression.def() 73 | } 74 | fn via() -> Option { 75 | Some( 76 | super::physical_children::Relation::CascadesGroup 77 | .def() 78 | .rev(), 79 | ) 80 | } 81 | } 82 | 83 | impl ActiveModelBehavior for ActiveModel {} 84 | -------------------------------------------------------------------------------- /optd-cost-model/src/stats/arith_encoder.rs: -------------------------------------------------------------------------------- 1 | //! This module provides an encoder that converts alpha-numeric strings 2 | //! into f64 values, designed to maintain the natural ordering of strings. 3 | //! 4 | //! While the encoding is theoretically lossless, in practice, it may suffer 5 | //! from precision loss due to floating-point errors. 6 | //! 7 | //! Non-alpha-numeric characters are relegated to the end of the encoded value, 8 | //! rendering them indistinguishable from one another in this context. 9 | 10 | use std::{collections::HashMap, sync::LazyLock}; 11 | 12 | // The alphanumerical ordering. 13 | const ALPHANUMERIC_ORDER: [char; 95] = [ 14 | ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', 15 | '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '0', '1', '2', '3', '4', 16 | '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 17 | 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 18 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 19 | ]; 20 | 21 | const PMF: f64 = 1.0 / (ALPHANUMERIC_ORDER.len() as f64); 22 | 23 | static CDF: LazyLock> = LazyLock::new(|| { 24 | let length = ALPHANUMERIC_ORDER.len() + 1; // To account for non-alpha-numeric characters. 25 | let mut cdf = HashMap::with_capacity(length); 26 | for (index, &char) in ALPHANUMERIC_ORDER.iter().enumerate() { 27 | cdf.insert(char, (index as f64) / (length as f64)); 28 | } 29 | cdf 30 | }); 31 | 32 | pub fn encode(string: &str) -> f64 { 33 | let mut left = 0.0; 34 | // 10_000.0 is fairly arbitrary. don't make it f64::MAX though because it causes overflow in 35 | // other places of the code 36 | let mut right = 10_000.0; 37 | 38 | for char in string.chars() { 39 | let cdf = CDF.get(&char).unwrap_or(&1.0); 40 | let distance = right - left; 41 | right = left + distance * (cdf + PMF); 42 | left += distance * cdf; 43 | } 44 | 45 | left 46 | } 47 | 48 | // Start of unit testing section. 49 | #[cfg(test)] 50 | mod tests { 51 | use super::encode; 52 | 53 | #[test] 54 | fn encode_tests() { 55 | assert!(encode("") < encode("abc")); 56 | assert!(encode("abc") < encode("bcd")); 57 | 58 | assert!(encode("a") < encode("aaa")); 59 | assert!(encode("!a") < encode("a!")); 60 | assert!(encode("Alexis") < encode("Schlomer")); 61 | 62 | assert!(encode("Gungnir Rules!") < encode("Schlomer")); 63 | assert!(encode("Gungnir Rules!") < encode("Schlomer")); 64 | 65 | assert_eq!(encode(" "), encode(" ")); 66 | assert_eq!(encode("Same"), encode("Same")); 67 | assert!(encode("Nicolas ") < encode("Nicolas💰💼")); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/statistic.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "statistic")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub name: String, 11 | pub table_id: Option, 12 | pub creation_time: DateTimeUtc, 13 | pub number_of_attributes: i32, 14 | pub variant_tag: i32, 15 | pub description: String, 16 | } 17 | 18 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 19 | pub enum Relation { 20 | #[sea_orm(has_many = "super::physical_expression_to_statistic_junction::Entity")] 21 | PhysicalExpressionToStatisticJunction, 22 | #[sea_orm(has_many = "super::statistic_to_attribute_junction::Entity")] 23 | StatisticToAttributeJunction, 24 | #[sea_orm( 25 | belongs_to = "super::table_metadata::Entity", 26 | from = "Column::TableId", 27 | to = "super::table_metadata::Column::Id", 28 | on_update = "Cascade", 29 | on_delete = "Cascade" 30 | )] 31 | TableMetadata, 32 | #[sea_orm(has_many = "super::versioned_statistic::Entity")] 33 | VersionedStatistic, 34 | } 35 | 36 | impl Related for Entity { 37 | fn to() -> RelationDef { 38 | Relation::PhysicalExpressionToStatisticJunction.def() 39 | } 40 | } 41 | 42 | impl Related for Entity { 43 | fn to() -> RelationDef { 44 | Relation::StatisticToAttributeJunction.def() 45 | } 46 | } 47 | 48 | impl Related for Entity { 49 | fn to() -> RelationDef { 50 | Relation::TableMetadata.def() 51 | } 52 | } 53 | 54 | impl Related for Entity { 55 | fn to() -> RelationDef { 56 | Relation::VersionedStatistic.def() 57 | } 58 | } 59 | 60 | impl Related for Entity { 61 | fn to() -> RelationDef { 62 | super::statistic_to_attribute_junction::Relation::Attribute.def() 63 | } 64 | fn via() -> Option { 65 | Some( 66 | super::statistic_to_attribute_junction::Relation::Statistic 67 | .def() 68 | .rev(), 69 | ) 70 | } 71 | } 72 | 73 | impl Related for Entity { 74 | fn to() -> RelationDef { 75 | super::physical_expression_to_statistic_junction::Relation::PhysicalExpression.def() 76 | } 77 | fn via() -> Option { 78 | Some( 79 | super::physical_expression_to_statistic_junction::Relation::Statistic 80 | .def() 81 | .rev(), 82 | ) 83 | } 84 | } 85 | 86 | impl ActiveModelBehavior for ActiveModel {} 87 | -------------------------------------------------------------------------------- /optd-cost-model/src/storage/mock.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused_variables, dead_code)] 2 | use std::collections::HashMap; 3 | 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use crate::{ 7 | common::types::{EpochId, ExprId, TableId}, 8 | stats::AttributeCombValueStats, 9 | Cost, CostModelResult, EstimatedStatistic, 10 | }; 11 | 12 | use super::CostModelStorageManager; 13 | 14 | pub type AttrIndices = Vec; 15 | 16 | #[serde_with::serde_as] 17 | #[derive(Serialize, Deserialize, Debug)] 18 | pub struct TableStats { 19 | pub row_cnt: u64, 20 | #[serde_as(as = "HashMap")] 21 | pub column_comb_stats: HashMap, 22 | } 23 | 24 | impl TableStats { 25 | pub fn new( 26 | row_cnt: u64, 27 | column_comb_stats: HashMap, 28 | ) -> Self { 29 | Self { 30 | row_cnt, 31 | column_comb_stats, 32 | } 33 | } 34 | } 35 | 36 | pub type BaseTableStats = HashMap; 37 | 38 | pub struct CostModelStorageMockManagerImpl { 39 | pub(crate) per_table_stats_map: BaseTableStats, 40 | } 41 | 42 | impl CostModelStorageMockManagerImpl { 43 | pub fn new(per_table_stats_map: BaseTableStats) -> Self { 44 | Self { 45 | per_table_stats_map, 46 | } 47 | } 48 | } 49 | 50 | impl CostModelStorageManager for CostModelStorageMockManagerImpl { 51 | async fn get_attributes_comb_statistics( 52 | &self, 53 | table_id: TableId, 54 | attr_base_indices: &[u64], 55 | ) -> CostModelResult> { 56 | let table_stats = self.per_table_stats_map.get(&table_id); 57 | match table_stats { 58 | None => Ok(None), 59 | Some(table_stats) => match table_stats.column_comb_stats.get(attr_base_indices) { 60 | None => Ok(None), 61 | Some(stats) => Ok(Some(stats.clone())), 62 | }, 63 | } 64 | } 65 | 66 | async fn get_table_row_count(&self, table_id: TableId) -> CostModelResult> { 67 | let table_stats = self.per_table_stats_map.get(&table_id); 68 | Ok(table_stats.map(|stats| stats.row_cnt)) 69 | } 70 | 71 | /// TODO: finish this when implementing the cost get/store tests 72 | async fn get_cost( 73 | &self, 74 | expr_id: ExprId, 75 | ) -> CostModelResult<(Option, Option)> { 76 | todo!() 77 | } 78 | 79 | /// TODO: finish this when implementing the cost get/store tests 80 | async fn store_cost( 81 | &self, 82 | expr_id: ExprId, 83 | cost: Option, 84 | estimated_statistic: Option, 85 | epoch_id: Option, 86 | ) -> CostModelResult<()> { 87 | todo!() 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_constraint_metadata.rs: -------------------------------------------------------------------------------- 1 | use crate::migrator::catalog::{index_metadata::IndexMetadata, table_metadata::TableMetadata}; 2 | use sea_orm_migration::{prelude::*, schema::*}; 3 | 4 | #[derive(Iden)] 5 | pub enum ConstraintMetadata { 6 | Table, 7 | Id, 8 | Name, 9 | VariantTag, 10 | TableId, 11 | IndexId, 12 | ForeignRefId, 13 | CheckSrc, 14 | } 15 | 16 | #[derive(DeriveMigrationName)] 17 | pub struct Migration; 18 | 19 | #[async_trait::async_trait] 20 | impl MigrationTrait for Migration { 21 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 22 | manager 23 | .create_table( 24 | Table::create() 25 | .table(ConstraintMetadata::Table) 26 | .if_not_exists() 27 | .col(pk_auto(ConstraintMetadata::Id)) 28 | .col(string(ConstraintMetadata::Name)) 29 | .col(integer(ConstraintMetadata::VariantTag)) 30 | .col(integer_null(ConstraintMetadata::TableId)) 31 | .foreign_key( 32 | ForeignKey::create() 33 | .from(ConstraintMetadata::Table, ConstraintMetadata::TableId) 34 | .to(TableMetadata::Table, TableMetadata::Id) 35 | .on_delete(ForeignKeyAction::Cascade) 36 | .on_update(ForeignKeyAction::Cascade), 37 | ) 38 | .col(integer_null(ConstraintMetadata::IndexId)) 39 | .foreign_key( 40 | ForeignKey::create() 41 | .from(ConstraintMetadata::Table, ConstraintMetadata::IndexId) 42 | .to(IndexMetadata::Table, IndexMetadata::Id) 43 | .on_delete(ForeignKeyAction::Cascade) 44 | .on_update(ForeignKeyAction::Cascade), 45 | ) 46 | .col(integer_null(ConstraintMetadata::ForeignRefId)) 47 | .foreign_key( 48 | ForeignKey::create() 49 | .from(ConstraintMetadata::Table, ConstraintMetadata::ForeignRefId) 50 | .to(TableMetadata::Table, TableMetadata::Id) 51 | .on_delete(ForeignKeyAction::Cascade) 52 | .on_update(ForeignKeyAction::Cascade), 53 | ) 54 | .col(string(ConstraintMetadata::CheckSrc)) 55 | .to_owned(), 56 | ) 57 | .await 58 | } 59 | 60 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 61 | manager 62 | .drop_table(Table::drop().table(ConstraintMetadata::Table).to_owned()) 63 | .await 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_logical_children.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing the [`cascades_group`] children of every [`logical_expression`]. 2 | //! 3 | //! Formally, this entity is a junction which allows us to represent a many-to-many relationship 4 | //! between [`logical_expression`] and [`cascades_group`]. Expressions can have any number of child 5 | //! groups, and every group can be a child of many different expressions, hence the many-to-many 6 | //! relationship. 7 | //! 8 | //! See [`cascades_group`] for more details. 9 | //! 10 | //! [`cascades_group`]: super::cascades_group 11 | //! [`logical_expression`]: super::logical_expression 12 | 13 | use crate::migrator::memo::{cascades_group::CascadesGroup, logical_expression::LogicalExpression}; 14 | use sea_orm_migration::{prelude::*, schema::*}; 15 | 16 | #[derive(DeriveIden)] 17 | pub enum LogicalChildren { 18 | Table, 19 | LogicalExpressionId, 20 | GroupId, 21 | } 22 | 23 | #[derive(DeriveMigrationName)] 24 | pub struct Migration; 25 | 26 | #[async_trait::async_trait] 27 | impl MigrationTrait for Migration { 28 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 29 | manager 30 | .create_table( 31 | Table::create() 32 | .table(LogicalChildren::Table) 33 | .if_not_exists() 34 | .col(integer(LogicalChildren::LogicalExpressionId)) 35 | .col(integer(LogicalChildren::GroupId)) 36 | .primary_key( 37 | Index::create() 38 | .col(LogicalChildren::LogicalExpressionId) 39 | .col(LogicalChildren::GroupId), 40 | ) 41 | .foreign_key( 42 | ForeignKey::create() 43 | .from(LogicalChildren::Table, LogicalChildren::GroupId) 44 | .to(LogicalExpression::Table, LogicalExpression::Id) 45 | .on_delete(ForeignKeyAction::Cascade) 46 | .on_update(ForeignKeyAction::Cascade), 47 | ) 48 | .foreign_key( 49 | ForeignKey::create() 50 | .from(LogicalChildren::Table, LogicalChildren::GroupId) 51 | .to(CascadesGroup::Table, CascadesGroup::Id) 52 | .on_delete(ForeignKeyAction::Cascade) 53 | .on_update(ForeignKeyAction::Cascade), 54 | ) 55 | .to_owned(), 56 | ) 57 | .await 58 | } 59 | 60 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 61 | manager 62 | .drop_table(Table::drop().table(LogicalChildren::Table).to_owned()) 63 | .await 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_statistic.rs: -------------------------------------------------------------------------------- 1 | //! This table stores the statistic infos. One sole statistic only has one row in this table. 2 | //! 3 | //! If we want to insert a new statistic, we should first insert one row into this table, then add a new 4 | //! event, and finally insert the statistic value into the versioned_statistic table. 5 | //! If we want to update a statistic, we should first find the real statistic id from this table, then 6 | //! add a new event, and finally insert the statistic value into the versioned_statistic table. 7 | 8 | use crate::migrator::catalog::m20241029_000001_table_metadata::TableMetadata; 9 | use sea_orm_migration::{prelude::*, schema::*}; 10 | 11 | #[derive(Iden)] 12 | pub enum Statistic { 13 | Table, 14 | Id, 15 | Name, 16 | // null if not a table statistic. 17 | TableId, 18 | CreationTime, 19 | // 0 if a table statistic. 20 | NumberOfAttributes, 21 | VariantTag, 22 | // Store the sorted attribute ids of this statistic, to support quick lookup (OR we can use junction table to look up) 23 | // For example, if we want to store the statistic of attributes [1, 2, 3], we can store it as "1,2,3". 24 | // During lookup, we should first sort the attribute ids, and then look up. 25 | // OR we can use statistic_to_attribute_junction table to look up. 26 | Description, 27 | } 28 | 29 | #[derive(DeriveMigrationName)] 30 | pub struct Migration; 31 | 32 | #[async_trait::async_trait] 33 | impl MigrationTrait for Migration { 34 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 35 | manager 36 | .create_table( 37 | Table::create() 38 | .table(Statistic::Table) 39 | .if_not_exists() 40 | .col(pk_auto(Statistic::Id)) 41 | .col(string(Statistic::Name)) 42 | .col(integer_null(Statistic::TableId)) 43 | .foreign_key( 44 | ForeignKey::create() 45 | .from(Statistic::Table, Statistic::TableId) 46 | .to(TableMetadata::Table, TableMetadata::Id) 47 | .on_delete(ForeignKeyAction::Cascade) 48 | .on_update(ForeignKeyAction::Cascade), 49 | ) 50 | .col(timestamp(Statistic::CreationTime)) 51 | .col(integer(Statistic::NumberOfAttributes)) 52 | .col(integer(Statistic::VariantTag)) 53 | .col(string(Statistic::Description)) 54 | .to_owned(), 55 | ) 56 | .await 57 | } 58 | 59 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 60 | manager 61 | .drop_table(Table::drop().table(Statistic::Table).to_owned()) 62 | .await 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/predicates/log_op_pred.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Display; 2 | 3 | use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode}; 4 | 5 | use super::list_pred::ListPred; 6 | 7 | /// TODO: documentation 8 | #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] 9 | pub enum LogOpType { 10 | And, 11 | Or, 12 | } 13 | 14 | impl Display for LogOpType { 15 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 16 | write!(f, "{:?}", self) 17 | } 18 | } 19 | 20 | #[derive(Clone, Debug)] 21 | pub struct LogOpPred(pub ArcPredicateNode); 22 | 23 | impl LogOpPred { 24 | pub fn new(op_type: LogOpType, preds: Vec) -> Self { 25 | LogOpPred( 26 | PredicateNode { 27 | typ: PredicateType::LogOp(op_type), 28 | children: preds, 29 | data: None, 30 | } 31 | .into(), 32 | ) 33 | } 34 | 35 | /// flatten_nested_logical is a helper function to flatten nested logical operators with same op 36 | /// type eg. (a AND (b AND c)) => ExprList([a, b, c]) 37 | /// (a OR (b OR c)) => ExprList([a, b, c]) 38 | /// It assume the children of the input expr_list are already flattened 39 | /// and can only be used in bottom up manner 40 | pub fn new_flattened_nested_logical(op: LogOpType, expr_list: ListPred) -> Self { 41 | // Since we assume that we are building the children bottom up, 42 | // there is no need to call flatten_nested_logical recursively 43 | let mut new_expr_list = Vec::new(); 44 | for child in expr_list.to_vec() { 45 | if let PredicateType::LogOp(child_op) = child.typ { 46 | if child_op == op { 47 | let child_log_op_expr = LogOpPred::from_pred_node(child).unwrap(); 48 | new_expr_list.extend(child_log_op_expr.children().to_vec()); 49 | continue; 50 | } 51 | } 52 | new_expr_list.push(child.clone()); 53 | } 54 | LogOpPred::new(op, new_expr_list) 55 | } 56 | 57 | pub fn children(&self) -> Vec { 58 | self.0.children.clone() 59 | } 60 | 61 | pub fn child(&self, idx: usize) -> ArcPredicateNode { 62 | self.0.child(idx) 63 | } 64 | 65 | pub fn op_type(&self) -> LogOpType { 66 | if let PredicateType::LogOp(op_type) = self.0.typ { 67 | op_type 68 | } else { 69 | panic!("not a log op") 70 | } 71 | } 72 | } 73 | 74 | impl ReprPredicateNode for LogOpPred { 75 | fn into_pred_node(self) -> ArcPredicateNode { 76 | self.0 77 | } 78 | 79 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 80 | if !matches!(pred_node.typ, PredicateType::LogOp(_)) { 81 | return None; 82 | } 83 | Some(Self(pred_node)) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_predicate_logical_expression_junction.rs: -------------------------------------------------------------------------------- 1 | /* 2 | Table predicate_logical_expression_junction { 3 | logical_expr_id integer [ref: > logical_expression.id] 4 | predicate_id integer [ref: > predicate.id] 5 | } 6 | */ 7 | 8 | use sea_orm_migration::{prelude::*, schema::integer}; 9 | 10 | use super::{ 11 | m20241029_000001_logical_expression::LogicalExpression, m20241029_000001_predicate::Predicate, 12 | }; 13 | 14 | #[derive(Iden)] 15 | pub enum PredicateLogicalExpressionJunction { 16 | Table, 17 | LogicalExprId, 18 | PredicateId, 19 | } 20 | 21 | #[derive(DeriveMigrationName)] 22 | pub struct Migration; 23 | 24 | #[async_trait::async_trait] 25 | impl MigrationTrait for Migration { 26 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 27 | manager 28 | .create_table( 29 | Table::create() 30 | .table(PredicateLogicalExpressionJunction::Table) 31 | .col(integer(PredicateLogicalExpressionJunction::LogicalExprId)) 32 | .foreign_key( 33 | ForeignKey::create() 34 | .from( 35 | PredicateLogicalExpressionJunction::Table, 36 | PredicateLogicalExpressionJunction::LogicalExprId, 37 | ) 38 | .to(LogicalExpression::Table, LogicalExpression::Id) 39 | .on_delete(ForeignKeyAction::Cascade) 40 | .on_update(ForeignKeyAction::Cascade), 41 | ) 42 | .col(integer(PredicateLogicalExpressionJunction::PredicateId)) 43 | .foreign_key( 44 | ForeignKey::create() 45 | .from( 46 | PredicateLogicalExpressionJunction::Table, 47 | PredicateLogicalExpressionJunction::PredicateId, 48 | ) 49 | .to(Predicate::Table, Predicate::Id) 50 | .on_delete(ForeignKeyAction::Cascade) 51 | .on_update(ForeignKeyAction::Cascade), 52 | ) 53 | .primary_key( 54 | Index::create() 55 | .col(PredicateLogicalExpressionJunction::LogicalExprId) 56 | .col(PredicateLogicalExpressionJunction::PredicateId), 57 | ) 58 | .to_owned(), 59 | ) 60 | .await 61 | } 62 | 63 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 64 | manager 65 | .drop_table( 66 | Table::drop() 67 | .table(PredicateLogicalExpressionJunction::Table) 68 | .to_owned(), 69 | ) 70 | .await 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /optd-persistent/src/entities/physical_expression.rs: -------------------------------------------------------------------------------- 1 | //! `SeaORM` Entity, @generated by sea-orm-codegen 1.1.0 2 | 3 | use sea_orm::entity::prelude::*; 4 | 5 | #[derive(Clone, Debug, PartialEq, DeriveEntityModel, Eq)] 6 | #[sea_orm(table_name = "physical_expression")] 7 | pub struct Model { 8 | #[sea_orm(primary_key)] 9 | pub id: i32, 10 | pub group_id: i32, 11 | pub fingerprint: i64, 12 | pub variant_tag: i16, 13 | pub data: Json, 14 | } 15 | 16 | #[derive(Copy, Clone, Debug, EnumIter, DeriveRelation)] 17 | pub enum Relation { 18 | #[sea_orm( 19 | belongs_to = "super::cascades_group::Entity", 20 | from = "Column::GroupId", 21 | to = "super::cascades_group::Column::Id", 22 | on_update = "Cascade", 23 | on_delete = "Cascade" 24 | )] 25 | CascadesGroup, 26 | #[sea_orm(has_many = "super::group_winner::Entity")] 27 | GroupWinner, 28 | #[sea_orm(has_many = "super::physical_children::Entity")] 29 | PhysicalChildren, 30 | #[sea_orm(has_many = "super::physical_expression_to_statistic_junction::Entity")] 31 | PhysicalExpressionToStatisticJunction, 32 | #[sea_orm(has_many = "super::physical_property::Entity")] 33 | PhysicalProperty, 34 | #[sea_orm(has_many = "super::plan_cost::Entity")] 35 | PlanCost, 36 | } 37 | 38 | impl Related for Entity { 39 | fn to() -> RelationDef { 40 | Relation::GroupWinner.def() 41 | } 42 | } 43 | 44 | impl Related for Entity { 45 | fn to() -> RelationDef { 46 | Relation::PhysicalChildren.def() 47 | } 48 | } 49 | 50 | impl Related for Entity { 51 | fn to() -> RelationDef { 52 | Relation::PhysicalExpressionToStatisticJunction.def() 53 | } 54 | } 55 | 56 | impl Related for Entity { 57 | fn to() -> RelationDef { 58 | Relation::PhysicalProperty.def() 59 | } 60 | } 61 | 62 | impl Related for Entity { 63 | fn to() -> RelationDef { 64 | Relation::PlanCost.def() 65 | } 66 | } 67 | 68 | impl Related for Entity { 69 | fn to() -> RelationDef { 70 | super::physical_children::Relation::CascadesGroup.def() 71 | } 72 | fn via() -> Option { 73 | Some( 74 | super::physical_children::Relation::PhysicalExpression 75 | .def() 76 | .rev(), 77 | ) 78 | } 79 | } 80 | 81 | impl Related for Entity { 82 | fn to() -> RelationDef { 83 | super::physical_expression_to_statistic_junction::Relation::Statistic.def() 84 | } 85 | fn via() -> Option { 86 | Some( 87 | super::physical_expression_to_statistic_junction::Relation::PhysicalExpression 88 | .def() 89 | .rev(), 90 | ) 91 | } 92 | } 93 | 94 | impl ActiveModelBehavior for ActiveModel {} 95 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_physical_children.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing the [`cascades_group`] children of every [`physical_expression`]. 2 | //! 3 | //! Formally, this entity is a junction which allows us to represent a many-to-many relationship 4 | //! between [`physical_expression`] and [`cascades_group`]. Expressions can have any number of child 5 | //! groups, and every group can be a child of many different expressions, hence the many-to-many 6 | //! relationship. 7 | //! 8 | //! See [`cascades_group`] for more details. 9 | //! 10 | //! [`cascades_group`]: super::cascades_group 11 | //! [`physical_expression`]: super::physical_expression 12 | 13 | use crate::migrator::memo::{ 14 | cascades_group::CascadesGroup, physical_expression::PhysicalExpression, 15 | }; 16 | use sea_orm_migration::{prelude::*, schema::*}; 17 | 18 | #[derive(DeriveIden)] 19 | pub enum PhysicalChildren { 20 | Table, 21 | PhysicalExpressionId, 22 | GroupId, 23 | } 24 | 25 | #[derive(DeriveMigrationName)] 26 | pub struct Migration; 27 | 28 | #[async_trait::async_trait] 29 | impl MigrationTrait for Migration { 30 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 31 | manager 32 | .create_table( 33 | Table::create() 34 | .table(PhysicalChildren::Table) 35 | .if_not_exists() 36 | .col(integer(PhysicalChildren::PhysicalExpressionId)) 37 | .col(integer(PhysicalChildren::GroupId)) 38 | .primary_key( 39 | Index::create() 40 | .col(PhysicalChildren::PhysicalExpressionId) 41 | .col(PhysicalChildren::GroupId), 42 | ) 43 | .foreign_key( 44 | ForeignKey::create() 45 | .from( 46 | PhysicalChildren::Table, 47 | PhysicalChildren::PhysicalExpressionId, 48 | ) 49 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 50 | .on_delete(ForeignKeyAction::Cascade) 51 | .on_update(ForeignKeyAction::Cascade), 52 | ) 53 | .foreign_key( 54 | ForeignKey::create() 55 | .from(PhysicalChildren::Table, PhysicalChildren::GroupId) 56 | .to(CascadesGroup::Table, CascadesGroup::Id) 57 | .on_delete(ForeignKeyAction::Cascade) 58 | .on_update(ForeignKeyAction::Cascade), 59 | ) 60 | .to_owned(), 61 | ) 62 | .await 63 | } 64 | 65 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 66 | manager 67 | .drop_table(Table::drop().table(PhysicalChildren::Table).to_owned()) 68 | .await 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/join/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::common::{ 2 | nodes::{ArcPredicateNode, PredicateType, ReprPredicateNode}, 3 | predicates::{attr_index_pred::AttrIndexPred, bin_op_pred::BinOpType}, 4 | properties::attr_ref::{ 5 | AttrRef, AttrRefs, BaseTableAttrRef, GroupAttrRefs, SemanticCorrelation, 6 | }, 7 | }; 8 | 9 | pub mod core; 10 | pub mod hash_join; 11 | pub mod nested_loop_join; 12 | 13 | pub(crate) fn get_input_correlation( 14 | left_prop: GroupAttrRefs, 15 | right_prop: GroupAttrRefs, 16 | ) -> Option { 17 | SemanticCorrelation::merge( 18 | left_prop.output_correlation().cloned(), 19 | right_prop.output_correlation().cloned(), 20 | ) 21 | } 22 | 23 | /// Check if an expr_tree is a join condition, returning the join on attr ref pair if it is. 24 | /// The reason the check and the info are in the same function is because their code is almost 25 | /// identical. It only picks out equality conditions between two attribute refs on different 26 | /// tables 27 | pub(crate) fn get_on_attr_ref_pair( 28 | expr_tree: ArcPredicateNode, 29 | attr_refs: &AttrRefs, 30 | ) -> Option<(AttrIndexPred, AttrIndexPred)> { 31 | // 1. Check that it's equality 32 | if expr_tree.typ == PredicateType::BinOp(BinOpType::Eq) { 33 | let left_child = expr_tree.child(0); 34 | let right_child = expr_tree.child(1); 35 | // 2. Check that both sides are attribute refs 36 | if left_child.typ == PredicateType::AttrIndex && right_child.typ == PredicateType::AttrIndex 37 | { 38 | // 3. Check that both sides don't belong to the same table (if we don't know, that 39 | // means they don't belong) 40 | let left_attr_ref_expr = AttrIndexPred::from_pred_node(left_child) 41 | .expect("we already checked that the type is AttrRef"); 42 | let right_attr_ref_expr = AttrIndexPred::from_pred_node(right_child) 43 | .expect("we already checked that the type is AttrRef"); 44 | let left_attr_ref = &attr_refs[left_attr_ref_expr.attr_index() as usize]; 45 | let right_attr_ref = &attr_refs[right_attr_ref_expr.attr_index() as usize]; 46 | let is_same_table = if let ( 47 | AttrRef::BaseTableAttrRef(BaseTableAttrRef { 48 | table_id: left_table_id, 49 | .. 50 | }), 51 | AttrRef::BaseTableAttrRef(BaseTableAttrRef { 52 | table_id: right_table_id, 53 | .. 54 | }), 55 | ) = (left_attr_ref, right_attr_ref) 56 | { 57 | left_table_id == right_table_id 58 | } else { 59 | false 60 | }; 61 | if !is_same_table { 62 | Some((left_attr_ref_expr, right_attr_ref_expr)) 63 | } else { 64 | None 65 | } 66 | } else { 67 | None 68 | } 69 | } else { 70 | None 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_statistic_to_attribute_junction.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing the relationship between [`statistic`] and [`attribute`]. 2 | //! 3 | //! One [`statistic`] can be associated with multiple [`attribute`]s, which denotes a joint 4 | //! statistic for the attributes. On the other hand, one [`attribute`] can be associated with 5 | //! multiple [`statistic`]s, since the attribute can be used in multiple statistics. 6 | 7 | use crate::migrator::catalog::attribute::Attribute; 8 | use crate::migrator::cost_model::statistic::Statistic; 9 | use sea_orm_migration::{prelude::*, schema::*}; 10 | 11 | #[derive(Iden)] 12 | pub enum StatisticToAttributeJunction { 13 | Table, 14 | StatisticId, 15 | AttributeId, 16 | } 17 | 18 | #[derive(DeriveMigrationName)] 19 | pub struct Migration; 20 | 21 | #[async_trait::async_trait] 22 | impl MigrationTrait for Migration { 23 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 24 | manager 25 | .create_table( 26 | Table::create() 27 | .table(StatisticToAttributeJunction::Table) 28 | .if_not_exists() 29 | .col(integer(StatisticToAttributeJunction::StatisticId)) 30 | .col(integer(StatisticToAttributeJunction::AttributeId)) 31 | .primary_key( 32 | Index::create() 33 | .col(StatisticToAttributeJunction::StatisticId) 34 | .col(StatisticToAttributeJunction::AttributeId), 35 | ) 36 | .foreign_key( 37 | ForeignKey::create() 38 | .from( 39 | StatisticToAttributeJunction::Table, 40 | StatisticToAttributeJunction::StatisticId, 41 | ) 42 | .to(Statistic::Table, Statistic::Id) 43 | .on_delete(ForeignKeyAction::Cascade) 44 | .on_update(ForeignKeyAction::Cascade), 45 | ) 46 | .foreign_key( 47 | ForeignKey::create() 48 | .from( 49 | StatisticToAttributeJunction::Table, 50 | StatisticToAttributeJunction::AttributeId, 51 | ) 52 | .to(Attribute::Table, Attribute::Id) 53 | .on_delete(ForeignKeyAction::Cascade) 54 | .on_update(ForeignKeyAction::Cascade), 55 | ) 56 | .to_owned(), 57 | ) 58 | .await 59 | } 60 | 61 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 62 | manager 63 | .drop_table( 64 | Table::drop() 65 | .table(StatisticToAttributeJunction::Table) 66 | .to_owned(), 67 | ) 68 | .await 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_attribute_constraint_junction.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing the relationship between [`attribute`] and [`constraint`]. 2 | //! 3 | //! If a constraint is a table constraint (including foreign keys, but not constraint triggers), 4 | //! the attributes that are constrained on are stored in the [`attribute_constraint_junction`]. 5 | //! 6 | //! One constraint might be associated with multiple attributes, for example, a composite primary key. 7 | 8 | use crate::migrator::catalog::{attribute::Attribute, constraint_metadata::ConstraintMetadata}; 9 | use sea_orm_migration::{prelude::*, schema::*}; 10 | 11 | #[derive(Iden)] 12 | pub enum AttributeConstraintJunction { 13 | Table, 14 | AttributeId, 15 | ConstraintId, 16 | } 17 | 18 | #[derive(DeriveMigrationName)] 19 | pub struct Migration; 20 | 21 | #[async_trait::async_trait] 22 | impl MigrationTrait for Migration { 23 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 24 | manager 25 | .create_table( 26 | Table::create() 27 | .table(AttributeConstraintJunction::Table) 28 | .if_not_exists() 29 | .col(integer(AttributeConstraintJunction::AttributeId)) 30 | .col(integer(AttributeConstraintJunction::ConstraintId)) 31 | .primary_key( 32 | Index::create() 33 | .col(AttributeConstraintJunction::AttributeId) 34 | .col(AttributeConstraintJunction::ConstraintId), 35 | ) 36 | .foreign_key( 37 | ForeignKey::create() 38 | .from( 39 | AttributeConstraintJunction::Table, 40 | AttributeConstraintJunction::AttributeId, 41 | ) 42 | .to(Attribute::Table, Attribute::Id) 43 | .on_delete(ForeignKeyAction::Cascade) 44 | .on_update(ForeignKeyAction::Cascade), 45 | ) 46 | .foreign_key( 47 | ForeignKey::create() 48 | .from( 49 | AttributeConstraintJunction::Table, 50 | AttributeConstraintJunction::ConstraintId, 51 | ) 52 | .to(ConstraintMetadata::Table, ConstraintMetadata::Id) 53 | .on_delete(ForeignKeyAction::Cascade) 54 | .on_update(ForeignKeyAction::Cascade), 55 | ) 56 | .to_owned(), 57 | ) 58 | .await 59 | } 60 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 61 | manager 62 | .drop_table( 63 | Table::drop() 64 | .table(AttributeConstraintJunction::Table) 65 | .to_owned(), 66 | ) 67 | .await 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_predicate_physical_expression_junction.rs: -------------------------------------------------------------------------------- 1 | /* 2 | Table predicate_physical_expression_junction { 3 | physical_expr_id integer [ref: > physical_expression.id] 4 | predicate_id integer [ref: > predicate.id] 5 | } 6 | */ 7 | 8 | use sea_orm_migration::{prelude::*, schema::integer}; 9 | 10 | use super::{ 11 | m20241029_000001_physical_expression::PhysicalExpression, m20241029_000001_predicate::Predicate, 12 | }; 13 | 14 | #[derive(Iden)] 15 | pub enum PredicatePhysicalExpressionJunction { 16 | Table, 17 | PhysicalExprId, 18 | PredicateId, 19 | } 20 | 21 | #[derive(DeriveMigrationName)] 22 | pub struct Migration; 23 | 24 | #[async_trait::async_trait] 25 | impl MigrationTrait for Migration { 26 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 27 | manager 28 | .create_table( 29 | Table::create() 30 | .table(PredicatePhysicalExpressionJunction::Table) 31 | .col(integer(PredicatePhysicalExpressionJunction::PhysicalExprId)) 32 | .foreign_key( 33 | ForeignKey::create() 34 | .name("predicate_physical_expression_junction_physical_expr_id_fkey") 35 | .from( 36 | PredicatePhysicalExpressionJunction::Table, 37 | PredicatePhysicalExpressionJunction::PhysicalExprId, 38 | ) 39 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 40 | .on_delete(ForeignKeyAction::Cascade) 41 | .on_update(ForeignKeyAction::Cascade), 42 | ) 43 | .col(integer(PredicatePhysicalExpressionJunction::PredicateId)) 44 | .foreign_key( 45 | ForeignKey::create() 46 | .name("predicate_physical_expression_junction_predicate_id_fkey") 47 | .from( 48 | PredicatePhysicalExpressionJunction::Table, 49 | PredicatePhysicalExpressionJunction::PredicateId, 50 | ) 51 | .to(Predicate::Table, Predicate::Id) 52 | .on_delete(ForeignKeyAction::Cascade) 53 | .on_update(ForeignKeyAction::Cascade), 54 | ) 55 | .primary_key( 56 | Index::create() 57 | .col(PredicatePhysicalExpressionJunction::PhysicalExprId) 58 | .col(PredicatePhysicalExpressionJunction::PredicateId), 59 | ) 60 | .to_owned(), 61 | ) 62 | .await 63 | } 64 | 65 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 66 | manager 67 | .drop_table( 68 | Table::drop() 69 | .table(PredicatePhysicalExpressionJunction::Table) 70 | .to_owned(), 71 | ) 72 | .await 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /optd-persistent/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use std::sync::LazyLock; 4 | 5 | use sea_orm::*; 6 | use sea_orm_migration::prelude::*; 7 | 8 | use migrator::Migrator; 9 | 10 | pub mod entities; 11 | mod migrator; 12 | 13 | pub mod cost_model; 14 | pub use cost_model::interface::CostModelStorageLayer; 15 | 16 | mod memo; 17 | pub use memo::interface::Memo; 18 | 19 | /// The filename of the SQLite database for migration. 20 | pub const DATABASE_FILENAME: &str = "sqlite.db"; 21 | /// The URL of the SQLite database for migration. 22 | pub const DATABASE_URL: &str = "sqlite:./sqlite.db?mode=rwc"; 23 | 24 | /// The filename of the SQLite database for testing. 25 | pub const TEST_DATABASE_FILENAME: &str = "init.db"; 26 | /// The URL of the SQLite database for testing. 27 | pub static TEST_DATABASE_FILE: LazyLock = LazyLock::new(|| { 28 | std::env::current_dir() 29 | .unwrap() 30 | .join("src") 31 | .join("db") 32 | .join(TEST_DATABASE_FILENAME) 33 | .to_str() 34 | .unwrap() 35 | .to_owned() 36 | }); 37 | /// The URL of the SQLite database for testing. 38 | pub static TEST_DATABASE_URL: LazyLock = 39 | LazyLock::new(|| get_sqlite_url(TEST_DATABASE_FILE.as_str())); 40 | 41 | fn get_sqlite_url(file: &str) -> String { 42 | format!("sqlite:{}?mode=rwc", file) 43 | } 44 | 45 | #[derive(Debug)] 46 | pub enum CostModelError { 47 | // TODO: Add more error types 48 | UnknownStatisticType, 49 | VersionedStatisticNotFound, 50 | CustomError(String), 51 | } 52 | 53 | /// TODO convert this to `thiserror` 54 | #[derive(Debug)] 55 | /// The different kinds of errors that might occur while running operations on a memo table. 56 | pub enum MemoError { 57 | UnknownGroup, 58 | UnknownLogicalExpression, 59 | UnknownPhysicalExpression, 60 | InvalidExpression, 61 | } 62 | 63 | /// TODO convert this to `thiserror` 64 | #[derive(Debug)] 65 | pub enum BackendError { 66 | Memo(MemoError), 67 | DatabaseError(DbErr), 68 | CostModel(CostModelError), 69 | BackendError(String), 70 | } 71 | 72 | impl From for CostModelError { 73 | fn from(value: String) -> Self { 74 | CostModelError::CustomError(value) 75 | } 76 | } 77 | 78 | impl From for BackendError { 79 | fn from(value: CostModelError) -> Self { 80 | BackendError::CostModel(value) 81 | } 82 | } 83 | 84 | impl From for BackendError { 85 | fn from(value: MemoError) -> Self { 86 | BackendError::Memo(value) 87 | } 88 | } 89 | 90 | impl From for BackendError { 91 | fn from(value: DbErr) -> Self { 92 | BackendError::DatabaseError(value) 93 | } 94 | } 95 | 96 | /// A type alias for a result with [`BackendError`] as the error type. 97 | pub type StorageResult = Result; 98 | 99 | pub struct BackendManager { 100 | db: DatabaseConnection, 101 | } 102 | 103 | impl BackendManager { 104 | /// Creates a new `BackendManager`. 105 | pub async fn new(database_url: Option<&str>) -> StorageResult { 106 | Ok(Self { 107 | db: Database::connect(database_url.unwrap_or(DATABASE_URL)).await?, 108 | }) 109 | } 110 | } 111 | 112 | pub async fn migrate(db: &DatabaseConnection) -> Result<(), DbErr> { 113 | Migrator::refresh(db).await 114 | } 115 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_physical_expression_to_statistic_junction.rs: -------------------------------------------------------------------------------- 1 | //! This table stores for a physical expression, which statistics are used, so we 2 | //! don't need to compute it again. It is especially useful for update_stats, where 3 | //! we need to invalidate all the costs based on the physical_expression_id, so we 4 | //! need to use this table to get the physical_expression_id via statistic_id. 5 | //! 6 | //! **NOTE:** When we compute the cost for a physical expression, we should also 7 | //! insert related mappings into this table. 8 | 9 | use crate::migrator::cost_model::statistic::Statistic; 10 | use crate::migrator::memo::physical_expression::PhysicalExpression; 11 | 12 | use sea_orm_migration::{prelude::*, schema::*}; 13 | 14 | #[derive(Iden)] 15 | pub enum PhysicalExpressionToStatisticJunction { 16 | Table, 17 | PhysicalExpressionId, 18 | StatisticId, 19 | } 20 | 21 | #[derive(DeriveMigrationName)] 22 | pub struct Migration; 23 | 24 | #[async_trait::async_trait] 25 | impl MigrationTrait for Migration { 26 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 27 | manager 28 | .create_table( 29 | Table::create() 30 | .table(PhysicalExpressionToStatisticJunction::Table) 31 | .if_not_exists() 32 | .col(integer( 33 | PhysicalExpressionToStatisticJunction::PhysicalExpressionId, 34 | )) 35 | .col(integer(PhysicalExpressionToStatisticJunction::StatisticId)) 36 | .primary_key( 37 | Index::create() 38 | .col(PhysicalExpressionToStatisticJunction::PhysicalExpressionId) 39 | .col(PhysicalExpressionToStatisticJunction::StatisticId), 40 | ) 41 | .foreign_key( 42 | ForeignKey::create() 43 | .from( 44 | PhysicalExpressionToStatisticJunction::Table, 45 | PhysicalExpressionToStatisticJunction::PhysicalExpressionId, 46 | ) 47 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 48 | .on_delete(ForeignKeyAction::Cascade) 49 | .on_update(ForeignKeyAction::Cascade), 50 | ) 51 | .foreign_key( 52 | ForeignKey::create() 53 | .from( 54 | PhysicalExpressionToStatisticJunction::Table, 55 | PhysicalExpressionToStatisticJunction::StatisticId, 56 | ) 57 | .to(Statistic::Table, Statistic::Id) 58 | .on_delete(ForeignKeyAction::Cascade) 59 | .on_update(ForeignKeyAction::Cascade), 60 | ) 61 | .to_owned(), 62 | ) 63 | .await 64 | } 65 | 66 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 67 | manager 68 | .drop_table( 69 | Table::drop() 70 | .table(PhysicalExpressionToStatisticJunction::Table) 71 | .to_owned(), 72 | ) 73 | .await 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/catalog/m20241029_000001_attribute_foreign_constraint_junction.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing the relationship between [`attribute`] and [`constraint`]. 2 | //! 3 | //! If a constraint is a foreign key constraint, the attributes that are referenced by the foreign 4 | //! key are stored in the [`attribute_foreign_constraint_junction`]. Note that this is different from 5 | //! the [`attribute_constraint_junction`] table, which stores the attributes that are constrained on. 6 | //! In the case of a foreign key constraint, this refers to the attributes that are referecing from the 7 | //! foreign key. 8 | //! 9 | //! One foreign key constraint might be associated with multiple attributes, for example, a composite 10 | //! foreign key. 11 | 12 | use crate::migrator::catalog::{attribute::Attribute, constraint_metadata::ConstraintMetadata}; 13 | use sea_orm_migration::{prelude::*, schema::*}; 14 | 15 | #[derive(Iden)] 16 | pub enum AttributeForeignConstraintJunction { 17 | Table, 18 | AttributeId, 19 | ConstraintId, 20 | } 21 | 22 | #[derive(DeriveMigrationName)] 23 | pub struct Migration; 24 | 25 | #[async_trait::async_trait] 26 | impl MigrationTrait for Migration { 27 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 28 | manager 29 | .create_table( 30 | Table::create() 31 | .table(AttributeForeignConstraintJunction::Table) 32 | .if_not_exists() 33 | .col(integer(AttributeForeignConstraintJunction::AttributeId)) 34 | .col(integer(AttributeForeignConstraintJunction::ConstraintId)) 35 | .primary_key( 36 | Index::create() 37 | .col(AttributeForeignConstraintJunction::AttributeId) 38 | .col(AttributeForeignConstraintJunction::ConstraintId), 39 | ) 40 | .foreign_key( 41 | ForeignKey::create() 42 | .from( 43 | AttributeForeignConstraintJunction::Table, 44 | AttributeForeignConstraintJunction::AttributeId, 45 | ) 46 | .to(Attribute::Table, Attribute::Id) 47 | .on_delete(ForeignKeyAction::Cascade) 48 | .on_update(ForeignKeyAction::Cascade), 49 | ) 50 | .foreign_key( 51 | ForeignKey::create() 52 | .from( 53 | AttributeForeignConstraintJunction::Table, 54 | AttributeForeignConstraintJunction::ConstraintId, 55 | ) 56 | .to(ConstraintMetadata::Table, ConstraintMetadata::Id) 57 | .on_delete(ForeignKeyAction::Cascade) 58 | .on_update(ForeignKeyAction::Cascade), 59 | ) 60 | .to_owned(), 61 | ) 62 | .await 63 | } 64 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 65 | manager 66 | .drop_table( 67 | Table::drop() 68 | .table(AttributeForeignConstraintJunction::Table) 69 | .to_owned(), 70 | ) 71 | .await 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/cost_model/m20241029_000001_plan_cost.rs: -------------------------------------------------------------------------------- 1 | //! When a statistic is updated, then all the related costs should be invalidated. (IsValid is set to false) 2 | //! This design (using IsValid flag) is based on the assumption that update_stats will not be called very frequently. 3 | //! It favors the compute_cost performance over the update_stats performance. 4 | //! 5 | //! This file stores cost like compute_cost, io_cost, network_cost, etc. for each physical expression. It also 6 | //! stores the estimated output row count (estimated statistic) of each physical expression. 7 | //! Sometimes we only have one of them to store, so we make Cost and EstimatedStatistic optional. But 8 | //! one record must have at least one of them. 9 | //! 10 | //! TODO: Ideally, we can separate them since sometimes we only have the estimated output row count to store, 11 | //! (when calling `derive_statistic`) but we don't have the detailed cost. 12 | 13 | use crate::migrator::cost_model::event::Event; 14 | use crate::migrator::memo::physical_expression::PhysicalExpression; 15 | use sea_orm_migration::{prelude::*, schema::*}; 16 | 17 | #[derive(Iden)] 18 | pub enum PlanCost { 19 | Table, 20 | Id, 21 | PhysicalExpressionId, 22 | EpochId, 23 | // It is json type, including computation cost, I/O cost, etc. 24 | Cost, 25 | // Raw estimated output row count of this expression 26 | EstimatedStatistic, 27 | // Whether the cost is valid or not. If the latest cost for an expr is invalid, then we need to recompute the cost. 28 | // We need to invalidate the cost when the related stats are updated. 29 | IsValid, 30 | } 31 | 32 | #[derive(DeriveMigrationName)] 33 | pub struct Migration; 34 | 35 | #[async_trait::async_trait] 36 | impl MigrationTrait for Migration { 37 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 38 | manager 39 | .create_table( 40 | Table::create() 41 | .table(PlanCost::Table) 42 | .if_not_exists() 43 | .col(pk_auto(PlanCost::Id)) 44 | .col(integer(PlanCost::PhysicalExpressionId)) 45 | .foreign_key( 46 | ForeignKey::create() 47 | .from(PlanCost::Table, PlanCost::PhysicalExpressionId) 48 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 49 | .on_delete(ForeignKeyAction::Cascade) 50 | .on_update(ForeignKeyAction::Cascade), 51 | ) 52 | .col(integer(PlanCost::EpochId)) 53 | .foreign_key( 54 | ForeignKey::create() 55 | .from(PlanCost::Table, PlanCost::EpochId) 56 | .to(Event::Table, Event::EpochId) 57 | .on_delete(ForeignKeyAction::Cascade) 58 | .on_update(ForeignKeyAction::Cascade), 59 | ) 60 | .col(json_null(PlanCost::Cost)) 61 | .col(float_null(PlanCost::EstimatedStatistic)) 62 | .col(boolean(PlanCost::IsValid)) 63 | .to_owned(), 64 | ) 65 | .await 66 | } 67 | 68 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 69 | manager 70 | .drop_table(Table::drop().table(PlanCost::Table).to_owned()) 71 | .await 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /optd-cost-model/src/common/nodes.rs: -------------------------------------------------------------------------------- 1 | use core::fmt; 2 | use std::{fmt::Display, sync::Arc}; 3 | 4 | use arrow_schema::DataType; 5 | 6 | use super::{ 7 | predicates::{ 8 | bin_op_pred::BinOpType, constant_pred::ConstantType, func_pred::FuncType, 9 | log_op_pred::LogOpType, sort_order_pred::SortOrderType, un_op_pred::UnOpType, 10 | }, 11 | values::Value, 12 | }; 13 | 14 | /// TODO: documentation 15 | #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] 16 | pub enum JoinType { 17 | Inner = 1, 18 | FullOuter, 19 | LeftOuter, 20 | RightOuter, 21 | Cross, 22 | LeftSemi, 23 | RightSemi, 24 | LeftAnti, 25 | RightAnti, 26 | } 27 | 28 | impl Display for JoinType { 29 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 30 | write!(f, "{:?}", self) 31 | } 32 | } 33 | 34 | /// TODO: documentation 35 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 36 | pub enum PhysicalNodeType { 37 | PhysicalProjection, 38 | PhysicalFilter, 39 | PhysicalScan, 40 | PhysicalSort, 41 | PhysicalAgg, 42 | PhysicalHashJoin(JoinType), 43 | PhysicalNestedLoopJoin(JoinType), 44 | PhysicalEmptyRelation, 45 | PhysicalLimit, 46 | } 47 | 48 | impl std::fmt::Display for PhysicalNodeType { 49 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 50 | write!(f, "{:?}", self) 51 | } 52 | } 53 | 54 | /// TODO: documentation 55 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 56 | pub enum PredicateType { 57 | List, 58 | Constant(ConstantType), 59 | AttrIndex, 60 | UnOp(UnOpType), 61 | BinOp(BinOpType), 62 | LogOp(LogOpType), 63 | Func(FuncType), 64 | SortOrder(SortOrderType), 65 | Between, 66 | Cast, 67 | Like, 68 | DataType(DataType), 69 | InList, 70 | } 71 | 72 | impl std::fmt::Display for PredicateType { 73 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 74 | write!(f, "{:?}", self) 75 | } 76 | } 77 | 78 | pub type ArcPredicateNode = Arc; 79 | 80 | /// TODO: documentation 81 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] 82 | pub struct PredicateNode { 83 | /// A generic predicate node type 84 | pub typ: PredicateType, 85 | /// Child predicate nodes, always materialized 86 | pub children: Vec, 87 | /// Data associated with the predicate, if any 88 | pub data: Option, 89 | } 90 | 91 | impl std::fmt::Display for PredicateNode { 92 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 93 | write!(f, "({}", self.typ)?; 94 | for child in &self.children { 95 | write!(f, " {}", child)?; 96 | } 97 | if let Some(data) = &self.data { 98 | write!(f, " {}", data)?; 99 | } 100 | write!(f, ")") 101 | } 102 | } 103 | 104 | impl PredicateNode { 105 | pub fn child(&self, idx: usize) -> ArcPredicateNode { 106 | self.children[idx].clone() 107 | } 108 | 109 | pub fn unwrap_data(&self) -> Value { 110 | self.data.clone().unwrap() 111 | } 112 | } 113 | pub trait ReprPredicateNode: 'static + Clone { 114 | fn into_pred_node(self) -> ArcPredicateNode; 115 | 116 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option; 117 | } 118 | 119 | impl ReprPredicateNode for ArcPredicateNode { 120 | fn into_pred_node(self) -> ArcPredicateNode { 121 | self 122 | } 123 | 124 | fn from_pred_node(pred_node: ArcPredicateNode) -> Option { 125 | Some(pred_node) 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_logical_expression.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a logical plan expression in the Cascades framework. 2 | //! 3 | //! Quoted from the Microsoft article _Extensible query optimizers in practice_: 4 | //! 5 | //! > A logical expression is defined as a tree of logical operators, and corresponds to a 6 | //! > relational algebraic expression. 7 | //! 8 | //! In the Cascades query optimization framework, the memo table stores equivalence classes of 9 | //! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both 10 | //! `logical_expression`s and [`physical_expression`]s. 11 | //! 12 | //! Optimization starts by "exploring" equivalent logical expressions within a group. For example, 13 | //! the logical expressions `Join(A, B)` and `Join(B, A)` are contained in the same group. The 14 | //! logical expressions are defined as a `Join` operator with the groups representing a scan of 15 | //! table `A` and a scan of table `B` as its children. 16 | //! 17 | //! # Columns 18 | //! 19 | //! Each `logical_expression` has a unique primary key ID, but it holds little importance other than 20 | //! helping distinguish between two different expressions. 21 | //! 22 | //! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint 23 | //! value that can be used to efficiently check equality between two potentially equivalent logical 24 | //! expressions (hash-consing). See ???TODO??? for more information on expression fingerprints. 25 | //! 26 | //! Finally, since there are many different types of operators, we store a variant tag and a data 27 | //! column as JSON to represent the semi-structured data fields of logical operators. 28 | //! 29 | //! # Entity Relationships 30 | //! 31 | //! The only relationship that `logical_expression` has is to [`cascades_group`]. It has **both** a 32 | //! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more 33 | //! details about this in the module-level documentation for [`cascades_group`]. 34 | //! 35 | //! [`cascades_group`]: super::cascades_group 36 | //! [`physical_expression`]: super::physical_expression 37 | 38 | use crate::migrator::memo::cascades_group::CascadesGroup; 39 | use sea_orm_migration::{prelude::*, schema::*}; 40 | 41 | #[derive(DeriveIden)] 42 | pub enum LogicalExpression { 43 | Table, 44 | Id, 45 | GroupId, 46 | Fingerprint, 47 | VariantTag, 48 | } 49 | 50 | #[derive(DeriveMigrationName)] 51 | pub struct Migration; 52 | 53 | #[async_trait::async_trait] 54 | impl MigrationTrait for Migration { 55 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 56 | manager 57 | .create_table( 58 | Table::create() 59 | .table(LogicalExpression::Table) 60 | .if_not_exists() 61 | .col(pk_auto(LogicalExpression::Id)) 62 | .col(integer(LogicalExpression::GroupId)) 63 | .foreign_key( 64 | ForeignKey::create() 65 | .from(LogicalExpression::Table, LogicalExpression::GroupId) 66 | .to(CascadesGroup::Table, CascadesGroup::Id) 67 | .on_delete(ForeignKeyAction::Cascade) 68 | .on_update(ForeignKeyAction::Cascade), 69 | ) 70 | .col(big_unsigned(LogicalExpression::Fingerprint)) 71 | .col(small_integer(LogicalExpression::VariantTag)) 72 | .to_owned(), 73 | ) 74 | .await 75 | } 76 | 77 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 78 | manager 79 | .drop_table(Table::drop().table(LogicalExpression::Table).to_owned()) 80 | .await 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /optd-persistent/src/main.rs: -------------------------------------------------------------------------------- 1 | //! Very basic demo of using the ORM for optd-persistent. 2 | //! 3 | //! You may run into errors when you first clone this repository. 4 | //! See the `README.md` for setup instructions. 5 | 6 | #![allow(dead_code, unused_imports)] 7 | 8 | use sea_orm::*; 9 | use sea_orm_migration::prelude::*; 10 | use serde_json::json; 11 | 12 | mod entities; 13 | mod migrator; 14 | 15 | use entities::{prelude::*, *}; 16 | use optd_persistent::DATABASE_URL; 17 | 18 | #[tokio::main] 19 | async fn main() { 20 | basic_demo().await; 21 | memo_demo().await; 22 | } 23 | 24 | async fn memo_demo() { 25 | let _db = Database::connect(DATABASE_URL).await.unwrap(); 26 | 27 | todo!() 28 | } 29 | 30 | async fn basic_demo() { 31 | let db = Database::connect(DATABASE_URL).await.unwrap(); 32 | 33 | // Create a new `CascadesGroup`. 34 | let group = cascades_group::ActiveModel { 35 | latest_winner: ActiveValue::Set(None), 36 | in_progress: ActiveValue::Set(false), 37 | is_optimized: ActiveValue::Set(false), 38 | ..Default::default() 39 | } 40 | .save(&db) 41 | .await 42 | .unwrap(); 43 | 44 | // Create a new logical expression. 45 | let l_expr = logical_expression::ActiveModel { 46 | group_id: group.id.clone(), 47 | fingerprint: ActiveValue::Set(42), // Example fingerprint 48 | variant_tag: ActiveValue::Set(1), // Example variant tag 49 | data: ActiveValue::Set(json!({ // Example operator 50 | "type": "Scan", 51 | "table": "lineitem", 52 | "predicate": "l_quantity < 10", 53 | })), 54 | ..Default::default() 55 | } 56 | .save(&db) 57 | .await 58 | .unwrap(); 59 | 60 | // Create a link between the group and the logical expression in the junction table. 61 | let _link = logical_children::ActiveModel { 62 | group_id: group.id.clone(), 63 | logical_expression_id: l_expr.id.clone(), 64 | } 65 | .insert(&db) 66 | .await 67 | .unwrap(); 68 | 69 | // Basic lookup test on each table. 70 | { 71 | let groups: Vec = CascadesGroup::find().all(&db).await.unwrap(); 72 | assert_eq!(groups.len(), 1); 73 | 74 | let l_expressions: Vec = 75 | LogicalExpression::find().all(&db).await.unwrap(); 76 | assert_eq!(l_expressions.len(), 1); 77 | } 78 | 79 | // Retrieve all logical expressions that belong to this group with lazy loading. 80 | { 81 | let group = CascadesGroup::find_by_id(*group.id.try_as_ref().unwrap()) 82 | .one(&db) 83 | .await 84 | .unwrap() 85 | .unwrap(); 86 | 87 | let group_expressions: Vec = group 88 | .find_related(LogicalExpression) 89 | .all(&db) 90 | .await 91 | .unwrap(); 92 | assert_eq!(group_expressions.len(), 1); 93 | } 94 | 95 | // Retrieve all logical expressions that belong to this group with eager loading. 96 | { 97 | let group_with_expressions: Vec<(cascades_group::Model, Vec)> = 98 | CascadesGroup::find() 99 | .find_with_related(LogicalExpression) 100 | .all(&db) 101 | .await 102 | .unwrap(); 103 | assert_eq!(group_with_expressions.len(), 1); 104 | assert_eq!(group_with_expressions[0].1.len(), 1); 105 | } 106 | 107 | // Clean up everything. Since everything is cascading, we only need to manually delete the group 108 | // and then SeaORM will take care of the expression and the junction. 109 | group.delete(&db).await.unwrap(); 110 | 111 | println!("Demo Finished!"); 112 | } 113 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_physical_expression.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a logical plan expression in the Cascades framework. 2 | //! 3 | //! Quoted from the Microsoft article _Extensible query optimizers in practice_: 4 | //! 5 | //! > A physical expression is a tree of physical operators, which is also referred to as the 6 | //! > _physical plan_ or simply _plan_. 7 | //! 8 | //! In the Cascades query optimization framework, the memo table stores equivalence classes of 9 | //! expressions (see [`cascades_group`]). These equivalence classes, or "groups", store both 10 | //! [`logical_expression`]s and `physical_expression`s. 11 | //! 12 | //! Optimization starts by exploring equivalent logical expressions within a group, and then it 13 | //! proceeds to implement / optimize those logical operators into physical operators. For example, 14 | //! the logical expression `Join(A, B)` could be implemented into a `HashJoin(A, B)` or a 15 | //! `NestedLoopJoin(A, B)`, and both of these new physical expressions would be contained in the 16 | //! same group. 17 | //! 18 | //! # Columns 19 | //! 20 | //! Each `physical_expression` has a unique primary key ID, and other tables will store a foreign 21 | //! key reference to a specific `physical_expression`s. 22 | //! 23 | //! The more interesting column is the `fingerprint` column, in which we store a hashed fingerprint 24 | //! value that can be used to efficiently check equality between two potentially equivalent physical 25 | //! expressions (hash-consing). See ???TODO??? for more information on expression fingerprints. 26 | //! 27 | //! Finally, since there are many different types of operators, we store a variant tag and a data 28 | //! column as JSON to represent the semi-structured data fields of logical operators. 29 | //! 30 | //! # Entity Relationships 31 | //! 32 | //! The only relationship that `physical_expression` has is to [`cascades_group`]. It has **both** a 33 | //! one-to-many **and** a many-to-many relationship with [`cascades_group`], and you can see more 34 | //! details about this in the module-level documentation for [`cascades_group`]. 35 | //! 36 | //! [`cascades_group`]: super::cascades_group 37 | //! [`logical_expression`]: super::logical_expression 38 | 39 | use crate::migrator::memo::cascades_group::CascadesGroup; 40 | use sea_orm_migration::{prelude::*, schema::*}; 41 | 42 | #[derive(DeriveIden)] 43 | pub enum PhysicalExpression { 44 | Table, 45 | Id, 46 | GroupId, 47 | Fingerprint, 48 | VariantTag, 49 | } 50 | 51 | #[derive(DeriveMigrationName)] 52 | pub struct Migration; 53 | 54 | #[async_trait::async_trait] 55 | impl MigrationTrait for Migration { 56 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 57 | manager 58 | .create_table( 59 | Table::create() 60 | .table(PhysicalExpression::Table) 61 | .if_not_exists() 62 | .col(pk_auto(PhysicalExpression::Id)) 63 | .col(integer(PhysicalExpression::GroupId)) 64 | .foreign_key( 65 | ForeignKey::create() 66 | .from(PhysicalExpression::Table, PhysicalExpression::GroupId) 67 | .to(CascadesGroup::Table, CascadesGroup::Id) 68 | .on_delete(ForeignKeyAction::Cascade) 69 | .on_update(ForeignKeyAction::Cascade), 70 | ) 71 | .col(big_unsigned(PhysicalExpression::Fingerprint)) 72 | .col(small_integer(PhysicalExpression::VariantTag)) 73 | .to_owned(), 74 | ) 75 | .await 76 | } 77 | 78 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 79 | manager 80 | .drop_table(Table::drop().table(PhysicalExpression::Table).to_owned()) 81 | .await 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /optd-cost-model/src/utils.rs: -------------------------------------------------------------------------------- 1 | //! optd's implementation of disjoint sets (union finds). It's send + sync + serializable. 2 | 3 | use std::{collections::HashMap, hash::Hash}; 4 | #[derive(Clone, Default)] 5 | pub struct DisjointSets { 6 | data_idx: HashMap, 7 | parents: Vec, 8 | } 9 | 10 | impl std::fmt::Debug for DisjointSets { 11 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 12 | write!(f, "DisjointSets") 13 | } 14 | } 15 | 16 | impl DisjointSets { 17 | pub fn new() -> Self { 18 | Self { 19 | data_idx: HashMap::new(), 20 | parents: Vec::new(), 21 | } 22 | } 23 | 24 | pub fn contains(&self, data: &T) -> bool { 25 | self.data_idx.contains_key(data) 26 | } 27 | 28 | #[must_use] 29 | pub fn make_set(&mut self, data: T) -> Option<()> { 30 | if self.data_idx.contains_key(&data) { 31 | return None; 32 | } 33 | let idx = self.parents.len(); 34 | self.data_idx.insert(data.clone(), idx); 35 | self.parents.push(idx); 36 | Some(()) 37 | } 38 | 39 | fn find(&mut self, mut idx: usize) -> usize { 40 | while self.parents[idx] != idx { 41 | self.parents[idx] = self.parents[self.parents[idx]]; 42 | idx = self.parents[idx]; 43 | } 44 | idx 45 | } 46 | 47 | fn find_const(&self, mut idx: usize) -> usize { 48 | while self.parents[idx] != idx { 49 | idx = self.parents[idx]; 50 | } 51 | idx 52 | } 53 | 54 | #[must_use] 55 | pub fn union(&mut self, data1: &T, data2: &T) -> Option<()> { 56 | let idx1 = *self.data_idx.get(data1)?; 57 | let idx2 = *self.data_idx.get(data2)?; 58 | let parent1 = self.find(idx1); 59 | let parent2 = self.find(idx2); 60 | if parent1 != parent2 { 61 | self.parents[parent1] = parent2; 62 | } 63 | Some(()) 64 | } 65 | 66 | pub fn same_set(&self, data1: &T, data2: &T) -> Option { 67 | let idx1 = *self.data_idx.get(data1)?; 68 | let idx2 = *self.data_idx.get(data2)?; 69 | Some(self.find_const(idx1) == self.find_const(idx2)) 70 | } 71 | 72 | pub fn set_size(&self, data: &T) -> Option { 73 | let idx = *self.data_idx.get(data)?; 74 | let parent = self.find_const(idx); 75 | Some( 76 | self.parents 77 | .iter() 78 | .filter(|&&x| self.find_const(x) == parent) 79 | .count(), 80 | ) 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | use super::*; 87 | #[test] 88 | fn test_union_find() { 89 | let mut set = DisjointSets::new(); 90 | set.make_set("a").unwrap(); 91 | set.make_set("b").unwrap(); 92 | set.make_set("c").unwrap(); 93 | set.make_set("d").unwrap(); 94 | set.make_set("e").unwrap(); 95 | assert!(set.same_set(&"a", &"a").unwrap()); 96 | assert!(!set.same_set(&"a", &"b").unwrap()); 97 | assert_eq!(set.set_size(&"a").unwrap(), 1); 98 | assert_eq!(set.set_size(&"c").unwrap(), 1); 99 | set.union(&"a", &"b").unwrap(); 100 | assert_eq!(set.set_size(&"a").unwrap(), 2); 101 | assert_eq!(set.set_size(&"c").unwrap(), 1); 102 | assert!(set.same_set(&"a", &"b").unwrap()); 103 | assert!(!set.same_set(&"a", &"c").unwrap()); 104 | set.union(&"b", &"c").unwrap(); 105 | assert!(set.same_set(&"a", &"c").unwrap()); 106 | assert!(!set.same_set(&"a", &"d").unwrap()); 107 | assert_eq!(set.set_size(&"a").unwrap(), 3); 108 | assert_eq!(set.set_size(&"d").unwrap(), 1); 109 | set.union(&"d", &"e").unwrap(); 110 | assert!(set.same_set(&"d", &"e").unwrap()); 111 | assert!(!set.same_set(&"a", &"d").unwrap()); 112 | assert_eq!(set.set_size(&"a").unwrap(), 3); 113 | assert_eq!(set.set_size(&"d").unwrap(), 2); 114 | set.union(&"c", &"e").unwrap(); 115 | assert!(set.same_set(&"a", &"e").unwrap()); 116 | assert_eq!(set.set_size(&"d").unwrap(), 5); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /.github/workflows/check.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://github.com/jonhoo/rust-ci-conf/blob/main/.github/workflows/check.yml 2 | 3 | # This workflow runs whenever a PR is opened or updated, or a commit is pushed to main. It runs 4 | # several checks: 5 | # - fmt: checks that the code is formatted according to rustfmt 6 | # - clippy: checks that the code does not contain any clippy warnings 7 | # - doc: checks that the code can be documented without errors 8 | # - hack: check combinations of feature flags 9 | # - msrv: check that the msrv specified in the crate is correct 10 | permissions: 11 | contents: read 12 | # This configuration allows maintainers of this repo to create a branch and pull request based on 13 | # the new branch. Restricting the push trigger to the main branch ensures that the PR only gets 14 | # built once. 15 | on: 16 | push: 17 | branches: [main] 18 | pull_request: 19 | # If new code is pushed to a PR branch, then cancel in progress workflows for that PR. Ensures that 20 | # we don't waste CI time, and returns results quicker https://github.com/jonhoo/rust-ci-conf/pull/5 21 | concurrency: 22 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 23 | cancel-in-progress: true 24 | defaults: 25 | run: 26 | working-directory: ./optd-persistent 27 | name: check 28 | jobs: 29 | fmt: 30 | runs-on: ubuntu-latest 31 | name: stable / fmt 32 | steps: 33 | - uses: actions/checkout@v4 34 | with: 35 | submodules: true 36 | - name: Install stable 37 | uses: dtolnay/rust-toolchain@stable 38 | with: 39 | components: rustfmt 40 | - name: cargo fmt --check 41 | run: cargo fmt --check 42 | clippy: 43 | runs-on: ubuntu-latest 44 | name: ${{ matrix.toolchain }} / clippy 45 | permissions: 46 | contents: read 47 | checks: write 48 | strategy: 49 | fail-fast: false 50 | matrix: 51 | # Get early warning of new lints which are regularly introduced in beta channels. 52 | toolchain: [stable, beta] 53 | steps: 54 | - uses: actions/checkout@v4 55 | with: 56 | submodules: true 57 | - name: Install ${{ matrix.toolchain }} 58 | uses: dtolnay/rust-toolchain@master 59 | with: 60 | toolchain: ${{ matrix.toolchain }} 61 | components: clippy 62 | - name: cargo clippy 63 | run: cargo clippy --locked --all-targets --all-features -- -D warnings 64 | doc: 65 | # run docs generation on nightly rather than stable. This enables features like 66 | # https://doc.rust-lang.org/beta/unstable-book/language-features/doc-cfg.html which allows an 67 | # API be documented as only available in some specific platforms. 68 | runs-on: ubuntu-latest 69 | name: nightly / doc 70 | steps: 71 | - uses: actions/checkout@v4 72 | with: 73 | submodules: true 74 | - name: Install nightly 75 | uses: dtolnay/rust-toolchain@nightly 76 | - name: Install cargo-docs-rs 77 | uses: dtolnay/install@cargo-docs-rs 78 | - name: cargo docs-rs 79 | run: cargo docs-rs 80 | hack: 81 | # cargo-hack checks combinations of feature flags to ensure that features are all additive 82 | # which is required for feature unification 83 | runs-on: ubuntu-latest 84 | name: ubuntu / stable / features 85 | steps: 86 | - uses: actions/checkout@v4 87 | with: 88 | submodules: true 89 | - name: Install stable 90 | uses: dtolnay/rust-toolchain@stable 91 | - name: cargo install cargo-hack 92 | uses: taiki-e/install-action@cargo-hack 93 | # intentionally no target specifier; see https://github.com/jonhoo/rust-ci-conf/pull/4 94 | # --feature-powerset runs for every combination of features 95 | - name: cargo hack 96 | run: cargo hack --feature-powerset check 97 | msrv: 98 | # check that we can build using the minimal rust version that is specified by this crate 99 | runs-on: ubuntu-latest 100 | # we use a matrix here just because env can't be used in job names 101 | # https://docs.github.com/en/actions/learn-github-actions/contexts#context-availability 102 | strategy: 103 | matrix: 104 | msrv: ["1.81.0"] # Start with first version that supports error_in_core 105 | name: ubuntu / ${{ matrix.msrv }} 106 | steps: 107 | - uses: actions/checkout@v4 108 | with: 109 | submodules: true 110 | - name: Install ${{ matrix.msrv }} 111 | uses: dtolnay/rust-toolchain@master 112 | with: 113 | toolchain: ${{ matrix.msrv }} 114 | - name: cargo +${{ matrix.msrv }} check 115 | run: cargo check 116 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_group_winner.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a the best physical plan (or "winner") of a Cascades group. 2 | //! 3 | //! In the Cascades framework, query optimization is done through dynamic programming that is based 4 | //! on the assumption that the cost model satisfies the _principle of optimality_. Quoted from the 5 | //! Microsoft article _Extensible query optimizers in practice_: 6 | //! 7 | //! > ... in the search space of linear sequence of joins, the optimal plan for a join of n 8 | //! > relations can be found by extending the optimal plan of a sub-expression of n - 1 joins with 9 | //! > an additional join. 10 | //! 11 | //! By storing the best sub-plans / [`physical_expression`]s of smaller Cascades groups, we can 12 | //! build up an optimal query plan. 13 | //! 14 | //! This entity represents the best plan sub-tree for a specific group. However, we store multiple 15 | //! winners over different epochs, as changes to the database may require us to re-evaluate what the 16 | //! optimal sub-plan is. 17 | //! 18 | //! # Columns 19 | //! 20 | //! Other than the primary key, all of the columns in this relation are foreign keys to other 21 | //! tables. 22 | //! 23 | //! A group winner is defined by the [`cascades_group`] it belongs to (`group_id`), the unique ID of 24 | //! the [`physical_expression`] (`physical_expression_id`), the ID of the cost record in the 25 | //! [`plan_cost`] table (`cost_id`), and the monotonically-increasing epoch ID in the [`event`] 26 | //! table (`epoch_id`). 27 | //! 28 | //! [`cascades_group`]: super::cascades_group 29 | //! [`physical_expression`]: super::physical_expression 30 | //! [`plan_cost`]: super::super::cost_model::plan_cost 31 | //! [`event`]: super::super::cost_model::event 32 | 33 | use crate::migrator::cost_model::{event::Event, plan_cost::PlanCost}; 34 | use crate::migrator::memo::{ 35 | cascades_group::CascadesGroup, physical_expression::PhysicalExpression, 36 | }; 37 | use sea_orm_migration::{prelude::*, schema::*}; 38 | 39 | #[derive(Iden)] 40 | pub enum GroupWinner { 41 | Table, 42 | Id, 43 | GroupId, 44 | PhysicalExpressionId, 45 | CostId, 46 | EpochId, 47 | } 48 | 49 | #[derive(DeriveMigrationName)] 50 | pub struct Migration; 51 | 52 | #[async_trait::async_trait] 53 | impl MigrationTrait for Migration { 54 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 55 | manager 56 | .create_table( 57 | Table::create() 58 | .table(GroupWinner::Table) 59 | .if_not_exists() 60 | .col(pk_auto(GroupWinner::Id)) 61 | .col(integer(GroupWinner::GroupId)) 62 | .foreign_key( 63 | ForeignKey::create() 64 | .from(GroupWinner::Table, GroupWinner::GroupId) 65 | .to(CascadesGroup::Table, CascadesGroup::Id) 66 | .on_delete(ForeignKeyAction::Cascade) 67 | .on_update(ForeignKeyAction::Cascade), 68 | ) 69 | .col(integer(GroupWinner::PhysicalExpressionId)) 70 | .foreign_key( 71 | ForeignKey::create() 72 | .from(GroupWinner::Table, GroupWinner::PhysicalExpressionId) 73 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 74 | .on_delete(ForeignKeyAction::Cascade) 75 | .on_update(ForeignKeyAction::Cascade), 76 | ) 77 | .col(integer(GroupWinner::CostId)) 78 | .foreign_key( 79 | ForeignKey::create() 80 | .from(GroupWinner::Table, GroupWinner::CostId) 81 | .to(PlanCost::Table, PlanCost::Id) 82 | .on_delete(ForeignKeyAction::Cascade) 83 | .on_update(ForeignKeyAction::Cascade), 84 | ) 85 | .col(integer(GroupWinner::EpochId)) 86 | .foreign_key( 87 | ForeignKey::create() 88 | .from(GroupWinner::Table, GroupWinner::EpochId) 89 | .to(Event::Table, Event::EpochId) 90 | .on_delete(ForeignKeyAction::Cascade) 91 | .on_update(ForeignKeyAction::Cascade), 92 | ) 93 | .to_owned(), 94 | ) 95 | .await 96 | } 97 | 98 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 99 | manager 100 | .drop_table(Table::drop().table(GroupWinner::Table).to_owned()) 101 | .await 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /optd-persistent/src/cost_model/catalog/mock_catalog.rs: -------------------------------------------------------------------------------- 1 | use sea_orm::prelude::Json; 2 | use serde_json::json; 3 | 4 | use crate::cost_model::interface::{AttrType, IndexType, StatType}; 5 | 6 | /// TODO: documentation 7 | pub struct MockDatabaseMetadata { 8 | pub id: i32, 9 | pub name: String, 10 | } 11 | 12 | pub struct MockNamespaceMetadata { 13 | pub id: i32, 14 | pub name: String, 15 | pub database_id: i32, 16 | } 17 | 18 | pub struct MockTableMetadata { 19 | pub id: i32, 20 | pub name: String, 21 | pub namespace_id: i32, 22 | } 23 | 24 | pub struct MockAttribute { 25 | pub id: i32, 26 | pub name: String, 27 | pub attr_index: i32, 28 | pub table_id: i32, 29 | pub compression_method: char, 30 | pub attr_type: i32, 31 | pub is_not_null: bool, 32 | } 33 | 34 | pub struct MockStatistic { 35 | pub id: i32, 36 | pub stat_type: i32, 37 | pub stat_value: Json, 38 | pub attr_ids: Vec, 39 | pub table_id: Option, 40 | pub name: String, 41 | } 42 | 43 | pub struct MockIndex { 44 | pub id: i32, 45 | pub name: String, 46 | pub table_id: i32, 47 | pub number_of_attributes: i32, 48 | pub index_type: i32, 49 | pub is_unique: bool, 50 | pub nulls_not_distinct: bool, 51 | pub is_primary: bool, 52 | pub is_clustered: bool, 53 | pub is_exclusion: bool, 54 | pub attr_ids: Vec, 55 | } 56 | 57 | pub struct MockTrigger { 58 | pub id: i32, 59 | pub name: String, 60 | pub table_id: i32, 61 | pub parent_trigger_id: i32, 62 | pub function: String, 63 | } 64 | 65 | /// TODO: documentation 66 | #[derive(Default)] 67 | pub struct MockCatalog { 68 | pub databases: Vec, 69 | pub namespaces: Vec, 70 | pub tables: Vec, 71 | pub attributes: Vec, 72 | pub statistics: Vec, 73 | pub indexes: Vec, 74 | pub triggers: Vec, 75 | // TODO: constraints 76 | } 77 | 78 | impl MockCatalog { 79 | /// TODO: documentation 80 | pub fn new() -> Self { 81 | let databases: Vec = vec![MockDatabaseMetadata { 82 | id: 1, 83 | name: "db1".to_string(), 84 | }]; 85 | let namespaces: Vec = vec![MockNamespaceMetadata { 86 | id: 1, 87 | name: "ns1".to_string(), 88 | database_id: 1, 89 | }]; 90 | let tables: Vec = vec![MockTableMetadata { 91 | id: 1, 92 | name: "table1".to_string(), 93 | namespace_id: 1, 94 | }]; 95 | let attributes: Vec = vec![ 96 | MockAttribute { 97 | id: 1, 98 | name: "attr1".to_string(), 99 | attr_index: 1, 100 | table_id: 1, 101 | compression_method: 'n', 102 | attr_type: AttrType::Integer as i32, 103 | is_not_null: true, 104 | }, 105 | MockAttribute { 106 | id: 2, 107 | name: "attr2".to_string(), 108 | attr_index: 2, 109 | table_id: 1, 110 | compression_method: 'n', 111 | attr_type: AttrType::Integer as i32, 112 | is_not_null: false, 113 | }, 114 | ]; 115 | let statistics: Vec = vec![ 116 | MockStatistic { 117 | id: 1, 118 | stat_type: StatType::NonNullCount as i32, 119 | stat_value: json!(100), 120 | attr_ids: vec![1], 121 | table_id: None, 122 | name: "CountAttr1".to_string(), 123 | }, 124 | MockStatistic { 125 | id: 2, 126 | stat_type: StatType::NonNullCount as i32, 127 | stat_value: json!(200), 128 | attr_ids: vec![2], 129 | table_id: None, 130 | name: "CountAttr2".to_string(), 131 | }, 132 | MockStatistic { 133 | id: 3, 134 | stat_type: StatType::TableRowCount as i32, 135 | stat_value: json!(300), 136 | attr_ids: vec![], 137 | table_id: Some(1), 138 | name: "Table1Count".to_string(), 139 | }, 140 | ]; 141 | let indexes: Vec = vec![MockIndex { 142 | id: 1, 143 | name: "index1".to_string(), 144 | table_id: 1, 145 | number_of_attributes: 1, 146 | index_type: IndexType::Hash as i32, 147 | is_unique: false, 148 | nulls_not_distinct: false, 149 | is_primary: true, 150 | is_clustered: false, 151 | is_exclusion: false, 152 | attr_ids: vec![1], 153 | }]; 154 | 155 | MockCatalog { 156 | databases, 157 | namespaces, 158 | tables, 159 | attributes, 160 | statistics, 161 | indexes, 162 | triggers: vec![], 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /optd-persistent/src/cost_model/interface.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code, unused_imports)] 2 | 3 | use crate::entities::cascades_group; 4 | use crate::entities::logical_expression; 5 | use crate::entities::physical_expression; 6 | use crate::StorageResult; 7 | use num_enum::{IntoPrimitive, TryFromPrimitive}; 8 | use sea_orm::prelude::Json; 9 | use sea_orm::*; 10 | use sea_orm_migration::prelude::*; 11 | use serde_json::json; 12 | use std::sync::Arc; 13 | 14 | pub type GroupId = i32; 15 | pub type TableId = i32; 16 | pub type AttrId = i32; 17 | pub type ExprId = i32; 18 | pub type EpochId = i32; 19 | pub type StatId = i32; 20 | pub type AttrIndex = i32; 21 | 22 | /// TODO: documentation 23 | pub enum CatalogSource { 24 | Iceberg(), 25 | Mock, 26 | } 27 | 28 | /// TODO: documentation 29 | #[repr(i32)] 30 | #[derive(Copy, Clone, Debug, PartialEq, IntoPrimitive, TryFromPrimitive)] 31 | pub enum AttrType { 32 | Integer = 1, 33 | Float, 34 | Varchar, 35 | Boolean, 36 | } 37 | 38 | /// TODO: documentation 39 | pub enum IndexType { 40 | BTree, 41 | Hash, 42 | } 43 | 44 | /// TODO: documentation 45 | pub enum ConstraintType { 46 | PrimaryKey, 47 | ForeignKey, 48 | Unique, 49 | Check, 50 | } 51 | 52 | /// TODO: documentation 53 | #[derive(Copy, Clone, Debug, PartialEq)] 54 | pub enum StatType { 55 | /// The row count in a table. `TableRowCount` only applies to table statistics. 56 | TableRowCount, 57 | /// The number of non-null values in a column. 58 | NonNullCount, 59 | /// The number of distinct values in a column. 60 | Cardinality, 61 | /// The minimum value in a column. 62 | Min, 63 | /// The maximum value in a column. 64 | Max, 65 | /// The frequency of each value in a column. 66 | MostCommonValues, 67 | /// The distribution of values in a column. 68 | Distribution, 69 | } 70 | 71 | /// TODO: documentation 72 | #[derive(PartialEq)] 73 | pub enum EpochOption { 74 | // TODO(lanlou): Could I make i32 -> EpochId? 75 | Existed(i32), 76 | New(String, String), 77 | } 78 | 79 | /// TODO: documentation 80 | #[derive(Clone, Debug)] 81 | pub struct Stat { 82 | pub stat_type: StatType, 83 | pub stat_value: Json, 84 | pub attr_ids: Vec, 85 | pub table_id: Option, 86 | pub name: String, 87 | } 88 | 89 | /// TODO: documentation 90 | #[derive(Clone, Debug, PartialEq)] 91 | pub struct Cost { 92 | pub compute_cost: f64, 93 | pub io_cost: f64, 94 | } 95 | 96 | #[derive(Clone, Debug)] 97 | pub struct Attr { 98 | pub table_id: i32, 99 | pub name: String, 100 | pub compression_method: String, 101 | pub attr_type: AttrType, 102 | pub base_index: i32, 103 | pub nullable: bool, 104 | } 105 | 106 | /// TODO: documentation 107 | #[trait_variant::make(Send)] 108 | pub trait CostModelStorageLayer { 109 | async fn create_new_epoch(&self, source: String, data: String) -> StorageResult; 110 | 111 | async fn update_stats_from_catalog(&self, c: CatalogSource) -> StorageResult; 112 | 113 | async fn update_stats( 114 | &self, 115 | stat: Stat, 116 | epoch_option: EpochOption, 117 | ) -> StorageResult>; 118 | 119 | async fn store_cost( 120 | &self, 121 | expr_id: ExprId, 122 | cost: Option, 123 | estimated_statistic: Option, 124 | epoch_id: Option, 125 | ) -> StorageResult<()>; 126 | 127 | async fn store_expr_stats_mappings( 128 | &self, 129 | expr_id: ExprId, 130 | stat_ids: Vec, 131 | ) -> StorageResult<()>; 132 | 133 | /// Get the statistics for a given table. 134 | /// 135 | /// If `epoch_id` is None, it will return the latest statistics. 136 | async fn get_stats_for_table( 137 | &self, 138 | table_id: TableId, 139 | stat_type: StatType, 140 | epoch_id: Option, 141 | ) -> StorageResult>; 142 | 143 | /// Get the (joint) statistics for one or more attributes. 144 | /// 145 | /// If `epoch_id` is None, it will return the latest statistics. 146 | async fn get_stats_for_attr( 147 | &self, 148 | attr_ids: Vec, 149 | stat_type: StatType, 150 | epoch_id: Option, 151 | ) -> StorageResult>; 152 | 153 | /// Get the (joint) statistics for one or more attributes based on attribute base indices. 154 | /// 155 | /// If `epoch_id` is None, it will return the latest statistics. 156 | async fn get_stats_for_attr_indices_based( 157 | &self, 158 | table_id: TableId, 159 | attr_base_indices: Vec, 160 | stat_type: StatType, 161 | epoch_id: Option, 162 | ) -> StorageResult>; 163 | 164 | async fn get_cost_analysis( 165 | &self, 166 | expr_id: ExprId, 167 | epoch_id: EpochId, 168 | ) -> StorageResult<(Option, Option)>; 169 | 170 | async fn get_cost(&self, expr_id: ExprId) -> StorageResult<(Option, Option)>; 171 | 172 | async fn get_attribute( 173 | &self, 174 | table_id: TableId, 175 | attribute_base_index: AttrIndex, 176 | ) -> StorageResult>; 177 | } 178 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # Taken from https://github.com/jonhoo/rust-ci-conf/blob/main/.github/workflows/test.yml 2 | 3 | # This is the main CI workflow that runs the test suite on all pushes to main and all pull requests. 4 | # It runs the following jobs: 5 | # - required: runs the test suite on ubuntu with stable and beta rust toolchains 6 | # - minimal: runs the test suite with the minimal versions of the dependencies that satisfy the 7 | # requirements of this crate, and its dependencies 8 | # - os-check: runs the test suite on mac and windows 9 | # - coverage: runs the test suite and collects coverage information 10 | # See check.yml for information about how the concurrency cancellation and workflow triggering works 11 | permissions: 12 | contents: read 13 | on: 14 | push: 15 | branches: [main] 16 | pull_request: 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 19 | cancel-in-progress: true 20 | defaults: 21 | run: 22 | working-directory: ./optd-persistent 23 | name: test 24 | jobs: 25 | required: 26 | runs-on: ubuntu-latest 27 | name: ubuntu / ${{ matrix.toolchain }} 28 | strategy: 29 | matrix: 30 | # run on stable and beta to ensure that tests won't break on the next version of the rust 31 | # toolchain 32 | toolchain: [stable, beta] 33 | steps: 34 | - uses: actions/checkout@v4 35 | with: 36 | submodules: true 37 | - name: Install ${{ matrix.toolchain }} 38 | uses: dtolnay/rust-toolchain@master 39 | with: 40 | toolchain: ${{ matrix.toolchain }} 41 | - name: cargo generate-lockfile 42 | # enable this ci template to run regardless of whether the lockfile is checked in or not 43 | if: hashFiles('Cargo.lock') == '' 44 | run: cargo generate-lockfile 45 | # https://twitter.com/jonhoo/status/1571290371124260865 46 | - name: cargo test --locked 47 | run: cargo test --locked --all-features --all-targets 48 | # https://github.com/rust-lang/cargo/issues/6669 49 | - name: cargo test --doc 50 | run: cargo test --locked --all-features --doc 51 | minimal: 52 | # This action chooses the oldest version of the dependencies permitted by Cargo.toml to ensure 53 | # that this crate is compatible with the minimal version that this crate and its dependencies 54 | # require. This will pickup issues where this create relies on functionality that was introduced 55 | # later than the actual version specified (e.g., when we choose just a major version, but a 56 | # method was added after this version). 57 | # 58 | # This particular check can be difficult to get to succeed as often transitive dependencies may 59 | # be incorrectly specified (e.g., a dependency specifies 1.0 but really requires 1.1.5). There 60 | # is an alternative flag available -Zdirect-minimal-versions that uses the minimal versions for 61 | # direct dependencies of this crate, while selecting the maximal versions for the transitive 62 | # dependencies. Alternatively, you can add a line in your Cargo.toml to artificially increase 63 | # the minimal dependency, which you do with e.g.: 64 | # ```toml 65 | # # for minimal-versions 66 | # [target.'cfg(any())'.dependencies] 67 | # openssl = { version = "0.10.55", optional = true } # needed to allow foo to build with -Zminimal-versions 68 | # ``` 69 | # The optional = true is necessary in case that dependency isn't otherwise transitively required 70 | # by your library, and the target bit is so that this dependency edge never actually affects 71 | # Cargo build order. See also 72 | # https://github.com/jonhoo/fantoccini/blob/fde336472b712bc7ebf5b4e772023a7ba71b2262/Cargo.toml#L47-L49. 73 | # This action is run on ubuntu with the stable toolchain, as it is not expected to fail 74 | runs-on: ubuntu-latest 75 | name: ubuntu / stable / minimal-versions 76 | steps: 77 | - uses: actions/checkout@v4 78 | with: 79 | submodules: true 80 | - name: Install stable 81 | uses: dtolnay/rust-toolchain@stable 82 | - name: Install nightly for -Zminimal-versions 83 | uses: dtolnay/rust-toolchain@nightly 84 | - name: rustup default stable 85 | run: rustup default stable 86 | - name: cargo update -Zminimal-versions 87 | run: cargo +nightly update -Zminimal-versions 88 | - name: cargo test 89 | run: cargo test --locked --all-features --all-targets 90 | os-check: 91 | # run cargo test on mac and windows 92 | runs-on: ${{ matrix.os }} 93 | name: ${{ matrix.os }} / stable 94 | strategy: 95 | fail-fast: false 96 | matrix: 97 | os: [macos-latest, windows-latest] 98 | steps: 99 | # if your project needs OpenSSL, uncomment this to fix Windows builds. 100 | # it's commented out by default as the install command takes 5-10m. 101 | # - run: echo "VCPKG_ROOT=$env:VCPKG_INSTALLATION_ROOT" | Out-File -FilePath $env:GITHUB_ENV -Append 102 | # if: runner.os == 'Windows' 103 | # - run: vcpkg install openssl:x64-windows-static-md 104 | # if: runner.os == 'Windows' 105 | - uses: actions/checkout@v4 106 | with: 107 | submodules: true 108 | - name: Install stable 109 | uses: dtolnay/rust-toolchain@stable 110 | - name: cargo generate-lockfile 111 | if: hashFiles('Cargo.lock') == '' 112 | run: cargo generate-lockfile 113 | - name: cargo test 114 | run: cargo test --locked --all-features --all-targets 115 | -------------------------------------------------------------------------------- /optd-cost-model/src/cost/filter/in_list.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | common::{ 3 | nodes::{PredicateType, ReprPredicateNode}, 4 | predicates::{ 5 | attr_index_pred::AttrIndexPred, constant_pred::ConstantPred, in_list_pred::InListPred, 6 | }, 7 | properties::attr_ref::{AttrRef, BaseTableAttrRef}, 8 | types::GroupId, 9 | }, 10 | cost_model::CostModelImpl, 11 | stats::UNIMPLEMENTED_SEL, 12 | storage::CostModelStorageManager, 13 | CostModelResult, 14 | }; 15 | 16 | impl CostModelImpl { 17 | /// Only support attrA in (val1, val2, val3) where attrA is a attribute ref and 18 | /// val1, val2, val3 are constants. 19 | pub(crate) async fn get_in_list_selectivity( 20 | &self, 21 | group_id: GroupId, 22 | expr: &InListPred, 23 | ) -> CostModelResult { 24 | let child = expr.child(); 25 | 26 | // Check child is a attribute ref. 27 | if !matches!(child.typ, PredicateType::AttrIndex) { 28 | return Ok(UNIMPLEMENTED_SEL); 29 | } 30 | 31 | // Check all expressions in the list are constants. 32 | let list_exprs = expr.list().to_vec(); 33 | if list_exprs 34 | .iter() 35 | .any(|expr| !matches!(expr.typ, PredicateType::Constant(_))) 36 | { 37 | return Ok(UNIMPLEMENTED_SEL); 38 | } 39 | 40 | // Convert child and const expressions to concrete types. 41 | let attr_ref_pred = AttrIndexPred::from_pred_node(child).unwrap(); 42 | let attr_ref_idx = attr_ref_pred.attr_index(); 43 | 44 | let list_exprs = list_exprs 45 | .into_iter() 46 | .map(|expr| { 47 | ConstantPred::from_pred_node(expr) 48 | .expect("we already checked all list elements are constants") 49 | }) 50 | .collect::>(); 51 | let negated = expr.negated(); 52 | 53 | if let AttrRef::BaseTableAttrRef(BaseTableAttrRef { table_id, attr_idx }) = 54 | self.memo.get_attribute_ref(group_id, attr_ref_idx) 55 | { 56 | let mut in_sel = 0.0; 57 | for expr in &list_exprs { 58 | let selectivity = self 59 | .get_attribute_equality_selectivity( 60 | table_id, 61 | attr_idx, 62 | &expr.value(), 63 | /* is_equality */ true, 64 | ) 65 | .await?; 66 | in_sel += selectivity; 67 | } 68 | in_sel = in_sel.min(1.0); 69 | if negated { 70 | Ok(1.0 - in_sel) 71 | } else { 72 | Ok(in_sel) 73 | } 74 | } else { 75 | // TODO: Child is a derived attribute. 76 | Ok(UNIMPLEMENTED_SEL) 77 | } 78 | } 79 | } 80 | 81 | #[cfg(test)] 82 | mod tests { 83 | use std::collections::HashMap; 84 | 85 | use crate::{ 86 | common::values::Value, 87 | stats::{utilities::simple_map::SimpleMap, MostCommonValues}, 88 | test_utils::tests::*, 89 | }; 90 | 91 | #[tokio::test] 92 | async fn test_in_list() { 93 | let per_attribute_stats = TestPerAttributeStats::new( 94 | MostCommonValues::SimpleFrequency(SimpleMap::new(vec![ 95 | (vec![Some(Value::Int32(1))], 0.8), 96 | (vec![Some(Value::Int32(2))], 0.2), 97 | ])), 98 | None, 99 | 2, 100 | 0.0, 101 | ); 102 | let cost_model = create_mock_cost_model( 103 | vec![TEST_TABLE1_ID], 104 | vec![HashMap::from([( 105 | TEST_ATTR1_BASE_INDEX, 106 | per_attribute_stats, 107 | )])], 108 | vec![None], 109 | ); 110 | 111 | assert_approx_eq::assert_approx_eq!( 112 | cost_model 113 | .get_in_list_selectivity(TEST_GROUP1_ID, &in_list(0, vec![Value::Int32(1)], false)) 114 | .await 115 | .unwrap(), 116 | 0.8 117 | ); 118 | assert_approx_eq::assert_approx_eq!( 119 | cost_model 120 | .get_in_list_selectivity( 121 | TEST_GROUP1_ID, 122 | &in_list(0, vec![Value::Int32(1), Value::Int32(2)], false) 123 | ) 124 | .await 125 | .unwrap(), 126 | 1.0 127 | ); 128 | assert_approx_eq::assert_approx_eq!( 129 | cost_model 130 | .get_in_list_selectivity(TEST_GROUP1_ID, &in_list(0, vec![Value::Int32(3)], false)) 131 | .await 132 | .unwrap(), 133 | 0.0 134 | ); 135 | assert_approx_eq::assert_approx_eq!( 136 | cost_model 137 | .get_in_list_selectivity(TEST_GROUP1_ID, &in_list(0, vec![Value::Int32(1)], true)) 138 | .await 139 | .unwrap(), 140 | 0.2 141 | ); 142 | assert_approx_eq::assert_approx_eq!( 143 | cost_model 144 | .get_in_list_selectivity( 145 | TEST_GROUP1_ID, 146 | &in_list(0, vec![Value::Int32(1), Value::Int32(2)], true) 147 | ) 148 | .await 149 | .unwrap(), 150 | 0.0 151 | ); 152 | assert_approx_eq::assert_approx_eq!( 153 | cost_model 154 | .get_in_list_selectivity(TEST_GROUP1_ID, &in_list(0, vec![Value::Int32(3)], true)) // TODO: Fix this 155 | .await 156 | .unwrap(), 157 | 1.0 158 | ); 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /optd-persistent/src/migrator/memo/m20241029_000001_cascades_group.rs: -------------------------------------------------------------------------------- 1 | //! An entity representing a group / equivalence class in the Cascades framework. 2 | //! 3 | //! Quoted from the Microsoft article _Extensible query optimizers in practice_: 4 | //! 5 | //! > In the memo, each class of equivalent expressions is called an equivalent class or a group, 6 | //! > and all equivalent expressions within the class are called group expressions or simply 7 | //! > expressions. 8 | //! 9 | //! A Cascades group is defined as a class of equivalent logical or physical expressions. The 10 | //! Cascades framework uses these groups as a way of storing the best query sub-plans for use in the 11 | //! dynamic programming search algorithm. 12 | //! 13 | //! For example, a Cascades group could be the set of expressions containing the logical expressions 14 | //! `Join(A, B)` and `Join(B, A)`, as well as the physical expressions `HashJoin(A, B)` and 15 | //! `NestedLoopJoin(B, A)`. 16 | //! 17 | //! # Columns 18 | //! 19 | //! Each group is assigned a monotonically-increasing (unique) ID. This ID will be important since 20 | //! there are many foreign key references from other tables to `cascades_group`. 21 | //! 22 | //! We additionally store a `latest_winner` foreign key reference to a physical expression. See 23 | //! the [section](#best-physical-plan-winner) below for more details. 24 | //! 25 | //! Finally, we store `in_progress` and `is_optimized` flags that are used for quickly determining 26 | //! the state of optimization for this group during the dynamic programming search. 27 | //! 28 | //! # Entity Relationships 29 | //! 30 | //! ### Child Expressions (Logical and Physical) 31 | //! 32 | //! To retrieve all of a `cascades_group`'s equivalent expressions, you must query the 33 | //! [`logical_expression`] or the [`physical_expression`] entities via their foreign keys to 34 | //! `cascades_group`. The relationship between [`logical_expression`] and `cascades_group` is 35 | //! many-to-one, and the exact same many-to-one relationship is held for [`physical_expression`] to 36 | //! `cascades_group`. 37 | //! 38 | //! ### Parent Expressions (Logical and Physical) 39 | //! 40 | //! Additionally, each logical or physical expression can have any number of `cascades_group`s as 41 | //! children, and a group can be a child of any expression. Thus, `cascades_group` additionally has 42 | //! a many-to-many relationship with [`logical_expression`] and [`physical_expression`] via the 43 | //! [`logical_children`] and [`physical_children`] entities. 44 | //! 45 | //! To reiterate, `cascades_group` has **both** a one-to-many **and** a many-to-many relationship 46 | //! with both [`logical_expression`] and [`physical_expression`]. This is due to groups being both 47 | //! parents and children of expressions. 48 | //! 49 | //! ### Best Physical Plan (Winner) 50 | //! 51 | //! The `cascades_group` entity also stores a `latest_winner` _nullable_ foreign key reference to 52 | //! a physical expression. This represents the most recent best query plan we have computed. The 53 | //! reason it is nullable is because we may not have come up with any best query plan yet. 54 | //! 55 | //! ### Logical Properties 56 | //! 57 | //! Lastly, each `cascades_group` record will have a set of logical properties store in the 58 | //! [`logical_property`] entity, where there is an many-to-one relationship from 59 | //! [`logical_property`] to `cascades_group`. Note that we do not store physical properties directly 60 | //! on the `cascades_group`, but rather we store them for each [`physical_expression`] record. 61 | //! 62 | //! [`logical_expression`]: super::logical_expression 63 | //! [`physical_expression`]: super::physical_expression 64 | //! [`logical_children`]: super::logical_children 65 | //! [`physical_children`]: super::physical_children 66 | //! [`logical_property`]: super::logical_property 67 | 68 | use crate::migrator::memo::physical_expression::PhysicalExpression; 69 | use sea_orm_migration::{prelude::*, schema::*}; 70 | 71 | #[derive(DeriveIden)] 72 | pub enum CascadesGroup { 73 | Table, 74 | Id, 75 | LatestWinner, 76 | InProgress, 77 | IsOptimized, 78 | ParentId, 79 | } 80 | 81 | #[derive(DeriveMigrationName)] 82 | pub struct Migration; 83 | 84 | #[async_trait::async_trait] 85 | impl MigrationTrait for Migration { 86 | async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { 87 | manager 88 | .create_table( 89 | Table::create() 90 | .table(CascadesGroup::Table) 91 | .if_not_exists() 92 | .col(pk_auto(CascadesGroup::Id)) 93 | .col(integer_null(CascadesGroup::LatestWinner)) 94 | .foreign_key( 95 | ForeignKey::create() 96 | .from(CascadesGroup::Table, CascadesGroup::LatestWinner) 97 | .to(PhysicalExpression::Table, PhysicalExpression::Id) 98 | .on_delete(ForeignKeyAction::SetNull) 99 | .on_update(ForeignKeyAction::Cascade), 100 | ) 101 | .col(boolean(CascadesGroup::InProgress)) 102 | .col(boolean(CascadesGroup::IsOptimized)) 103 | .col(integer_null(CascadesGroup::ParentId)) 104 | .foreign_key( 105 | ForeignKey::create() 106 | .from(CascadesGroup::Table, CascadesGroup::ParentId) 107 | .to(CascadesGroup::Table, CascadesGroup::Id) 108 | .on_delete(ForeignKeyAction::SetNull) 109 | .on_update(ForeignKeyAction::Cascade), 110 | ) 111 | .to_owned(), 112 | ) 113 | .await 114 | } 115 | 116 | async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { 117 | manager 118 | .drop_table(Table::drop().table(CascadesGroup::Table).to_owned()) 119 | .await 120 | } 121 | } 122 | --------------------------------------------------------------------------------