├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── src ├── agg.rs ├── expr.rs ├── lib.rs ├── plan.rs └── value.rs ├── src_ans ├── agg.rs ├── expr.rs ├── lib.rs ├── plan.rs ├── schema.rs └── value.rs └── tests ├── 10_index_resolving.rs ├── 1_language.rs ├── 2_rewrite.rs ├── 3_conditional_rewrite.rs ├── 4_constant_folding.rs ├── 5_sql_plan.rs ├── 6_plan_elimination.rs ├── 7_predicate_pushdown.rs ├── 8_projection_pushdown.rs └── 9_agg_extraction.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "ahash" 7 | version = "0.7.6" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" 10 | dependencies = [ 11 | "getrandom", 12 | "once_cell", 13 | "version_check", 14 | ] 15 | 16 | [[package]] 17 | name = "autocfg" 18 | version = "1.1.0" 19 | source = "registry+https://github.com/rust-lang/crates.io-index" 20 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 21 | 22 | [[package]] 23 | name = "byteorder" 24 | version = "1.4.3" 25 | source = "registry+https://github.com/rust-lang/crates.io-index" 26 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 27 | 28 | [[package]] 29 | name = "cfg-if" 30 | version = "1.0.0" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 33 | 34 | [[package]] 35 | name = "egg" 36 | version = "0.9.2" 37 | source = "registry+https://github.com/rust-lang/crates.io-index" 38 | checksum = "4c6e969a475908119d4603393dfe05a17f676d9570c493c90763321aa950de2c" 39 | dependencies = [ 40 | "env_logger", 41 | "fxhash", 42 | "hashbrown", 43 | "indexmap", 44 | "instant", 45 | "log", 46 | "smallvec", 47 | "symbol_table", 48 | "symbolic_expressions", 49 | "thiserror", 50 | ] 51 | 52 | [[package]] 53 | name = "env_logger" 54 | version = "0.9.3" 55 | source = "registry+https://github.com/rust-lang/crates.io-index" 56 | checksum = "a12e6657c4c97ebab115a42dcee77225f7f482cdd841cf7088c657a42e9e00e7" 57 | dependencies = [ 58 | "log", 59 | ] 60 | 61 | [[package]] 62 | name = "fxhash" 63 | version = "0.2.1" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 66 | dependencies = [ 67 | "byteorder", 68 | ] 69 | 70 | [[package]] 71 | name = "getrandom" 72 | version = "0.2.8" 73 | source = "registry+https://github.com/rust-lang/crates.io-index" 74 | checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" 75 | dependencies = [ 76 | "cfg-if", 77 | "libc", 78 | "wasi", 79 | ] 80 | 81 | [[package]] 82 | name = "hashbrown" 83 | version = "0.12.3" 84 | source = "registry+https://github.com/rust-lang/crates.io-index" 85 | checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" 86 | dependencies = [ 87 | "ahash", 88 | ] 89 | 90 | [[package]] 91 | name = "indexmap" 92 | version = "1.9.2" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399" 95 | dependencies = [ 96 | "autocfg", 97 | "hashbrown", 98 | ] 99 | 100 | [[package]] 101 | name = "instant" 102 | version = "0.1.12" 103 | source = "registry+https://github.com/rust-lang/crates.io-index" 104 | checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" 105 | dependencies = [ 106 | "cfg-if", 107 | ] 108 | 109 | [[package]] 110 | name = "libc" 111 | version = "0.2.139" 112 | source = "registry+https://github.com/rust-lang/crates.io-index" 113 | checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79" 114 | 115 | [[package]] 116 | name = "log" 117 | version = "0.4.17" 118 | source = "registry+https://github.com/rust-lang/crates.io-index" 119 | checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" 120 | dependencies = [ 121 | "cfg-if", 122 | ] 123 | 124 | [[package]] 125 | name = "once_cell" 126 | version = "1.17.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66" 129 | 130 | [[package]] 131 | name = "proc-macro2" 132 | version = "1.0.51" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "5d727cae5b39d21da60fa540906919ad737832fe0b1c165da3a34d6548c849d6" 135 | dependencies = [ 136 | "unicode-ident", 137 | ] 138 | 139 | [[package]] 140 | name = "quote" 141 | version = "1.0.23" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b" 144 | dependencies = [ 145 | "proc-macro2", 146 | ] 147 | 148 | [[package]] 149 | name = "smallvec" 150 | version = "1.10.0" 151 | source = "registry+https://github.com/rust-lang/crates.io-index" 152 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 153 | 154 | [[package]] 155 | name = "sql-optimizer-labs" 156 | version = "0.1.0" 157 | dependencies = [ 158 | "egg", 159 | ] 160 | 161 | [[package]] 162 | name = "symbol_table" 163 | version = "0.2.0" 164 | source = "registry+https://github.com/rust-lang/crates.io-index" 165 | checksum = "32bf088d1d7df2b2b6711b06da3471bc86677383c57b27251e18c56df8deac14" 166 | dependencies = [ 167 | "ahash", 168 | "hashbrown", 169 | ] 170 | 171 | [[package]] 172 | name = "symbolic_expressions" 173 | version = "5.0.3" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "7c68d531d83ec6c531150584c42a4290911964d5f0d79132b193b67252a23b71" 176 | 177 | [[package]] 178 | name = "syn" 179 | version = "1.0.107" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5" 182 | dependencies = [ 183 | "proc-macro2", 184 | "quote", 185 | "unicode-ident", 186 | ] 187 | 188 | [[package]] 189 | name = "thiserror" 190 | version = "1.0.38" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" 193 | dependencies = [ 194 | "thiserror-impl", 195 | ] 196 | 197 | [[package]] 198 | name = "thiserror-impl" 199 | version = "1.0.38" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" 202 | dependencies = [ 203 | "proc-macro2", 204 | "quote", 205 | "syn", 206 | ] 207 | 208 | [[package]] 209 | name = "unicode-ident" 210 | version = "1.0.6" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc" 213 | 214 | [[package]] 215 | name = "version_check" 216 | version = "0.9.4" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 219 | 220 | [[package]] 221 | name = "wasi" 222 | version = "0.11.0+wasi-snapshot-preview1" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 225 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sql-optimizer-labs" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | egg = "0.9" 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SQL Optimizer Labs 2 | 3 | Build a SQL optimizer in 1000 lines of Rust using [egg](https://egraphs-good.github.io). 4 | 5 | 🚧 Under construction 🚧 Stay tuned 👀 6 | 7 | ## Tasks 8 | 9 | Fill the code in `src` and pass the tests in `tests`! 10 | 11 | ```sh 12 | cargo test --test 1_language 13 | cargo test --test 2_rewrite 14 | cargo test --test 3_conditional_rewrite 15 | cargo test --test 4_constant_folding 16 | cargo test --test 5_sql_plan 17 | cargo test --test 6_plan_elimination 18 | cargo test --test 7_predicate_pushdown 19 | cargo test --test 8_projection_pushdown 20 | cargo test --test 9_agg_extraction 21 | cargo test --test 10_index_resolving 22 | ``` 23 | 24 | ## What's Next 25 | 26 | These labs are taken from the [RisingLight] project. 27 | [Check out] how it works in a real database system! 28 | 29 | [RisingLight]: https://github.com/risinglightdb/risinglight 30 | [Check out]: https://github.com/risinglightdb/risinglight/blob/main/src/planner/mod.rs 31 | -------------------------------------------------------------------------------- /src/agg.rs: -------------------------------------------------------------------------------- 1 | use egg::Language; 2 | 3 | use super::*; 4 | 5 | #[derive(Debug, PartialEq, Eq)] 6 | pub enum Error { 7 | // #[error("aggregate function calls cannot be nested")] 8 | NestedAgg(String), 9 | // #[error("WHERE clause cannot contain aggregates")] 10 | AggInWhere, 11 | // #[error("GROUP BY clause cannot contain aggregates")] 12 | AggInGroupBy, 13 | // #[error("column {0} must appear in the GROUP BY clause or be used in an aggregate function")] 14 | ColumnNotInAgg(String), 15 | } 16 | 17 | /// Converts the SELECT statement into a plan tree. 18 | /// 19 | /// The nodes of all clauses have been added to the `egraph`. 20 | /// `from`, `where_`... are the ids of their root node. 21 | pub fn plan_select( 22 | egraph: &mut EGraph, 23 | from: Id, 24 | where_: Id, 25 | having: Id, 26 | groupby: Id, 27 | orderby: Id, 28 | projection: Id, 29 | ) -> Result { 30 | todo!() 31 | } 32 | -------------------------------------------------------------------------------- /src/expr.rs: -------------------------------------------------------------------------------- 1 | //! Expression simplification rules and constant folding. 2 | 3 | use egg::rewrite as rw; 4 | 5 | use super::*; 6 | 7 | /// Returns all rules of expression simplification. 8 | #[rustfmt::skip] 9 | pub fn rules() -> Vec { vec![ 10 | rw!("add-zero"; "(+ ?a 0)" => "?a"), 11 | 12 | // TODO: add more rules 13 | ]} 14 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | 3 | use std::hash::Hash; 4 | 5 | use egg::{define_language, Analysis, DidMerge, Id}; 6 | 7 | pub mod agg; 8 | pub mod expr; 9 | pub mod plan; 10 | mod value; 11 | 12 | pub use value::*; 13 | 14 | pub type RecExpr = egg::RecExpr; 15 | pub type EGraph = egg::EGraph; 16 | pub type Rewrite = egg::Rewrite; 17 | 18 | define_language! { 19 | pub enum Expr { 20 | // values 21 | Constant(Value), // null, true, 1, 'hello' 22 | Column(Column), // t.a, b, c 23 | 24 | // TODO: add more nodes 25 | } 26 | } 27 | 28 | /// The unified analysis for all rules. 29 | #[derive(Default)] 30 | pub struct ExprAnalysis; 31 | 32 | /// The analysis data associated with each eclass. 33 | /// 34 | /// See [`egg::Analysis`] for how data is being processed. 35 | #[derive(Debug)] 36 | pub struct Data { 37 | // TODO: add analysis data 38 | } 39 | 40 | impl Analysis for ExprAnalysis { 41 | type Data = Data; 42 | 43 | /// Analyze a node and give the result. 44 | fn make(egraph: &EGraph, enode: &Expr) -> Self::Data { 45 | todo!() 46 | } 47 | 48 | /// Merge the analysis data with previous one. 49 | fn merge(&mut self, to: &mut Self::Data, from: Self::Data) -> DidMerge { 50 | todo!() 51 | } 52 | 53 | /// Modify the graph after analyzing a node. 54 | fn modify(egraph: &mut EGraph, id: Id) { 55 | todo!() 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/plan.rs: -------------------------------------------------------------------------------- 1 | //! Plan optimization rules. 2 | 3 | use std::collections::HashSet; 4 | 5 | use super::*; 6 | use egg::rewrite as rw; 7 | 8 | /// Returns the rules that always improve the plan. 9 | pub fn rules() -> Vec { 10 | let mut rules = vec![]; 11 | rules.extend(projection_pushdown_rules()); 12 | rules.extend(join_rules()); 13 | // TODO: add rules 14 | rules 15 | } 16 | 17 | #[rustfmt::skip] 18 | pub fn join_rules() -> Vec { vec![ 19 | // TODO: add rules 20 | ]} 21 | 22 | /// Pushdown projections and prune unused columns. 23 | #[rustfmt::skip] 24 | pub fn projection_pushdown_rules() -> Vec { vec![ 25 | // TODO: add rules 26 | ]} 27 | -------------------------------------------------------------------------------- /src/value.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Display, str::FromStr}; 2 | 3 | /// SQL value. 4 | /// 5 | /// # Display and Parse Format 6 | /// 7 | /// - Null: `null` 8 | /// - Bool: `false` 9 | /// - Integer: `1` 10 | /// - String: `'string'` 11 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 12 | pub enum Value { 13 | Null, 14 | Bool(bool), 15 | Int(i32), 16 | String(String), 17 | } 18 | 19 | impl Display for Value { 20 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 21 | match self { 22 | Value::Null => write!(f, "null"), 23 | Value::Bool(b) => write!(f, "{b}"), 24 | Value::Int(i) => write!(f, "{i}"), 25 | Value::String(s) => write!(f, "'{s}'"), 26 | } 27 | } 28 | } 29 | 30 | impl FromStr for Value { 31 | type Err = String; 32 | 33 | fn from_str(s: &str) -> Result { 34 | if s == "null" { 35 | return Ok(Value::Null); 36 | } else if let Ok(i) = s.parse() { 37 | return Ok(Value::Bool(i)); 38 | } else if let Ok(i) = s.parse() { 39 | return Ok(Value::Int(i)); 40 | } else if s.starts_with('\'') && s.ends_with('\'') { 41 | return Ok(Value::String(s[1..s.len() - 1].to_string())); 42 | } 43 | Err(s.to_string()) 44 | } 45 | } 46 | 47 | pub type Column = egg::Symbol; 48 | -------------------------------------------------------------------------------- /src_ans/agg.rs: -------------------------------------------------------------------------------- 1 | use egg::Language; 2 | 3 | use super::*; 4 | 5 | /// The data type of aggragation analysis. 6 | pub type AggSet = Vec; 7 | 8 | /// Returns all aggragations in the tree. 9 | /// 10 | /// Note: if there is an agg over agg, e.g. `sum(count(a))`, only the upper one will be returned. 11 | pub fn analyze_aggs(egraph: &EGraph, enode: &Expr) -> AggSet { 12 | use Expr::*; 13 | let x = |i: &Id| egraph[*i].data.aggs.clone(); 14 | match enode { 15 | Max(_) | Min(_) | Sum(_) | Avg(_) | Count(_) => vec![enode.clone()], 16 | // merge the set from all children 17 | Nested(_) | List(_) | Neg(_) | Not(_) | IsNull(_) | Add(_) | Sub(_) | Mul(_) | Div(_) 18 | | Eq(_) | NotEq(_) | Gt(_) | Lt(_) | GtEq(_) | LtEq(_) | And(_) | Or(_) | Xor(_) 19 | | Asc(_) | Desc(_) => enode.children().iter().flat_map(x).collect(), 20 | // ignore plan nodes 21 | _ => vec![], 22 | } 23 | } 24 | 25 | #[derive(Debug, PartialEq, Eq)] 26 | pub enum Error { 27 | // #[error("aggregate function calls cannot be nested")] 28 | NestedAgg(String), 29 | // #[error("WHERE clause cannot contain aggregates")] 30 | AggInWhere, 31 | // #[error("GROUP BY clause cannot contain aggregates")] 32 | AggInGroupBy, 33 | // #[error("column {0} must appear in the GROUP BY clause or be used in an aggregate function")] 34 | ColumnNotInAgg(String), 35 | } 36 | 37 | /// Converts the SELECT statement into a plan tree. 38 | /// 39 | /// The nodes of all clauses have been added to the `egraph`. 40 | /// `from`, `where_`... are the ids of their root node. 41 | pub fn plan_select( 42 | egraph: &mut EGraph, 43 | from: Id, 44 | where_: Id, 45 | having: Id, 46 | groupby: Id, 47 | orderby: Id, 48 | projection: Id, 49 | ) -> Result { 50 | AggExtractor { egraph }.plan_select(from, where_, having, groupby, orderby, projection) 51 | } 52 | 53 | struct AggExtractor<'a> { 54 | egraph: &'a mut EGraph, 55 | } 56 | 57 | impl AggExtractor<'_> { 58 | fn aggs(&self, id: Id) -> &[Expr] { 59 | &self.egraph[id].data.aggs 60 | } 61 | 62 | fn node(&self, id: Id) -> &Expr { 63 | &self.egraph[id].nodes[0] 64 | } 65 | 66 | fn plan_select( 67 | &mut self, 68 | from: Id, 69 | where_: Id, 70 | having: Id, 71 | groupby: Id, 72 | orderby: Id, 73 | projection: Id, 74 | ) -> Result { 75 | if !self.aggs(where_).is_empty() { 76 | return Err(Error::AggInWhere); 77 | } 78 | if !self.aggs(groupby).is_empty() { 79 | return Err(Error::AggInGroupBy); 80 | } 81 | let mut plan = self.egraph.add(Expr::Filter([where_, from])); 82 | let mut to_rewrite = [projection, having, orderby]; 83 | plan = self.plan_agg(&mut to_rewrite, groupby, plan)?; 84 | let [projection, having, orderby] = to_rewrite; 85 | plan = self.egraph.add(Expr::Filter([having, plan])); 86 | plan = self.egraph.add(Expr::Order([orderby, plan])); 87 | plan = self.egraph.add(Expr::Proj([projection, plan])); 88 | Ok(plan) 89 | } 90 | 91 | /// Extracts all aggregations from `exprs` and generates an [`Agg`](Expr::Agg) plan. 92 | /// If no aggregation is found and no `groupby` keys, returns the original `plan`. 93 | fn plan_agg(&mut self, exprs: &mut [Id], groupby: Id, plan: Id) -> Result { 94 | let expr_list = self.egraph.add(Expr::List(exprs.to_vec().into())); 95 | let aggs = self.aggs(expr_list).to_vec(); 96 | if aggs.is_empty() && self.node(groupby).as_list().is_empty() { 97 | return Ok(plan); 98 | } 99 | // check nested agg 100 | for agg in aggs.iter() { 101 | if agg 102 | .children() 103 | .iter() 104 | .any(|child| !self.aggs(*child).is_empty()) 105 | { 106 | return Err(Error::NestedAgg(agg.to_string())); 107 | } 108 | } 109 | let mut list: Vec<_> = aggs.into_iter().map(|agg| self.egraph.add(agg)).collect(); 110 | // make sure the order of the aggs is deterministic 111 | list.sort(); 112 | list.dedup(); 113 | let mut schema = list.clone(); 114 | schema.extend_from_slice(self.node(groupby).as_list()); 115 | let aggs = self.egraph.add(Expr::List(list.into())); 116 | let plan = self.egraph.add(Expr::Agg([aggs, groupby, plan])); 117 | // check for not aggregated columns 118 | // rewrite the expressions with a wrapper over agg or group keys 119 | for id in exprs { 120 | *id = self.rewrite_agg_in_expr(*id, &schema)?; 121 | } 122 | Ok(plan) 123 | } 124 | 125 | /// Rewrites the expression `id` with aggs wrapped in a [`Nested`](Expr::Nested) node. 126 | /// Returns the new expression. 127 | /// 128 | /// # Example 129 | /// ```text 130 | /// id: (+ (sum a) (+ b 1)) 131 | /// schema: (sum a), (+ b 1) 132 | /// output: (+ (`(sum a)) (`(+ b 1))) 133 | /// 134 | /// so that `id` won't be optimized to: 135 | /// (+ b (+ (sum a) 1)) 136 | /// which can not be composed by `schema` 137 | /// ``` 138 | fn rewrite_agg_in_expr(&mut self, id: Id, schema: &[Id]) -> Result { 139 | let mut expr = self.node(id).clone(); 140 | if schema.contains(&id) { 141 | // found agg, wrap it with Nested 142 | return Ok(self.egraph.add(Expr::Nested(id))); 143 | } 144 | if let Expr::Column(cid) = &expr { 145 | return Err(Error::ColumnNotInAgg(cid.to_string())); 146 | } 147 | for child in expr.children_mut() { 148 | *child = self.rewrite_agg_in_expr(*child, schema)?; 149 | } 150 | Ok(self.egraph.add(expr)) 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src_ans/expr.rs: -------------------------------------------------------------------------------- 1 | //! Expression simplification rules and constant folding. 2 | 3 | use egg::{rewrite as rw, Subst, Var}; 4 | 5 | use super::*; 6 | 7 | /// Returns all rules of expression simplification. 8 | #[rustfmt::skip] 9 | pub fn rules() -> Vec { vec![ 10 | rw!("add-zero"; "(+ ?a 0)" => "?a"), 11 | rw!("add-comm"; "(+ ?a ?b)" => "(+ ?b ?a)"), 12 | rw!("add-assoc"; "(+ ?a (+ ?b ?c))" => "(+ (+ ?a ?b) ?c)"), 13 | rw!("add-same"; "(+ ?a ?a)" => "(* ?a 2)"), 14 | rw!("add-neg"; "(+ ?a (- ?b))" => "(- ?a ?b)"), 15 | 16 | rw!("mul-zero"; "(* ?a 0)" => "0"), 17 | rw!("mul-one"; "(* ?a 1)" => "?a"), 18 | rw!("mul-minus"; "(* ?a -1)" => "(- ?a)"), 19 | rw!("mul-comm"; "(* ?a ?b)" => "(* ?b ?a)"), 20 | rw!("mul-assoc"; "(* ?a (* ?b ?c))" => "(* (* ?a ?b) ?c)"), 21 | 22 | rw!("neg-neg"; "(- (- ?a))" => "?a"), 23 | rw!("neg-sub"; "(- (- ?a ?b))" => "(- ?b ?a)"), 24 | 25 | rw!("sub-zero"; "(- ?a 0)" => "?a"), 26 | rw!("zero-sub"; "(- 0 ?a)" => "(- ?a)"), 27 | rw!("sub-cancel"; "(- ?a ?a)" => "0"), 28 | 29 | rw!("mul-add-distri"; "(* ?a (+ ?b ?c))" => "(+ (* ?a ?b) (* ?a ?c))"), 30 | rw!("mul-add-factor"; "(+ (* ?a ?b) (* ?a ?c))" => "(* ?a (+ ?b ?c))"), 31 | 32 | rw!("mul-div-cancel"; "(/ (* ?a ?b) ?b)" => "?a" if is_not_zero("?b")), 33 | 34 | rw!("eq-eq"; "(= ?a ?a)" => "true"), 35 | rw!("ne-eq"; "(<> ?a ?a)" => "false"), 36 | rw!("gt-eq"; "(> ?a ?a)" => "false"), 37 | rw!("lt-eq"; "(< ?a ?a)" => "false"), 38 | rw!("ge-eq"; "(>= ?a ?a)" => "true"), 39 | rw!("le-eq"; "(<= ?a ?a)" => "true"), 40 | rw!("eq-comm"; "(= ?a ?b)" => "(= ?b ?a)"), 41 | rw!("ne-comm"; "(<> ?a ?b)" => "(<> ?b ?a)"), 42 | rw!("gt-comm"; "(> ?a ?b)" => "(< ?b ?a)"), 43 | rw!("lt-comm"; "(< ?a ?b)" => "(> ?b ?a)"), 44 | rw!("ge-comm"; "(>= ?a ?b)" => "(<= ?b ?a)"), 45 | rw!("le-comm"; "(<= ?a ?b)" => "(>= ?b ?a)"), 46 | rw!("eq-add"; "(= (+ ?a ?b) ?c)" => "(= ?a (- ?c ?b))"), 47 | rw!("ne-add"; "(<> (+ ?a ?b) ?c)" => "(<> ?a (- ?c ?b))"), 48 | rw!("gt-add"; "(> (+ ?a ?b) ?c)" => "(> ?a (- ?c ?b))"), 49 | rw!("lt-add"; "(< (+ ?a ?b) ?c)" => "(< ?a (- ?c ?b))"), 50 | rw!("ge-add"; "(>= (+ ?a ?b) ?c)" => "(>= ?a (- ?c ?b))"), 51 | rw!("le-add"; "(<= (+ ?a ?b) ?c)" => "(<= ?a (- ?c ?b))"), 52 | rw!("eq-trans"; "(and (= ?a ?b) (= ?b ?c))" => "(and (= ?a ?b) (= ?a ?c))"), 53 | 54 | rw!("not-eq"; "(not (= ?a ?b))" => "(<> ?a ?b)"), 55 | rw!("not-ne"; "(not (<> ?a ?b))" => "(= ?a ?b)"), 56 | rw!("not-gt"; "(not (> ?a ?b))" => "(<= ?a ?b)"), 57 | rw!("not-ge"; "(not (>= ?a ?b))" => "(< ?a ?b)"), 58 | rw!("not-lt"; "(not (< ?a ?b))" => "(>= ?a ?b)"), 59 | rw!("not-le"; "(not (<= ?a ?b))" => "(> ?a ?b)"), 60 | rw!("not-and"; "(not (and ?a ?b))" => "(or (not ?a) (not ?b))"), 61 | rw!("not-or"; "(not (or ?a ?b))" => "(and (not ?a) (not ?b))"), 62 | rw!("not-not"; "(not (not ?a))" => "?a"), 63 | 64 | rw!("and-false"; "(and false ?a)" => "false"), 65 | rw!("and-true"; "(and true ?a)" => "?a"), 66 | rw!("and-null"; "(and null ?a)" => "null"), 67 | rw!("and-same"; "(and ?a ?a)" => "?a"), 68 | rw!("and-comm"; "(and ?a ?b)" => "(and ?b ?a)"), 69 | rw!("and-not"; "(and ?a (not ?a))" => "false"), 70 | rw!("and-assoc"; "(and ?a (and ?b ?c))" => "(and (and ?a ?b) ?c)"), 71 | 72 | rw!("or-false"; "(or false ?a)" => "?a"), 73 | rw!("or-true"; "(or true ?a)" => "true"), 74 | rw!("or-null"; "(or null ?a)" => "null"), 75 | rw!("or-same"; "(or ?a ?a)" => "?a"), 76 | rw!("or-comm"; "(or ?a ?b)" => "(or ?b ?a)"), 77 | rw!("or-not"; "(or ?a (not ?a))" => "true"), 78 | rw!("or-assoc"; "(or ?a (or ?b ?c))" => "(or (or ?a ?b) ?c)"), 79 | rw!("or-and"; "(or (and ?a ?b) (and ?a ?c))" => "(and ?a (or ?b ?c))"), 80 | 81 | rw!("xor-false"; "(xor false ?a)" => "?a"), 82 | rw!("xor-true"; "(xor true ?a)" => "(not ?a)"), 83 | rw!("xor-null"; "(xor null ?a)" => "null"), 84 | rw!("xor-same"; "(xor ?a ?a)" => "false"), 85 | rw!("xor-comm"; "(xor ?a ?b)" => "(xor ?b ?a)"), 86 | rw!("xor-not"; "(xor ?a (not ?a))" => "true"), 87 | rw!("xor-assoc"; "(xor ?a (xor ?b ?c))" => "(xor (xor ?a ?b) ?c)"), 88 | ]} 89 | 90 | /// The data type of constant analysis. 91 | /// 92 | /// `Some` for a known constant, `None` for unknown. 93 | pub type ConstValue = Option; 94 | 95 | /// Evaluate constant for a node. 96 | pub fn eval_constant(egraph: &EGraph, enode: &Expr) -> ConstValue { 97 | use Expr::*; 98 | let x = |i: &Id| egraph[*i].data.constant.as_ref(); 99 | Some(match enode { 100 | Constant(v) => v.clone(), 101 | Column(_) => return None, 102 | List(_) => return None, 103 | Neg(a) => -x(a)?.clone(), 104 | Not(a) => !x(a)?.clone(), 105 | IsNull(a) => x(a)?.is_null().into(), 106 | Add([a, b]) => x(a)? + x(b)?, 107 | Sub([a, b]) => x(a)? - x(b)?, 108 | Mul([a, b]) => x(a)? * x(b)?, 109 | Div([a, b]) => { 110 | let xa = x(a)?; 111 | let xb = x(b)?; 112 | if xb.is_zero() { 113 | return None; 114 | } 115 | xa / xb 116 | } 117 | Eq([a, b]) => (x(a)? == x(b)?).into(), 118 | NotEq([a, b]) => (x(a)? != x(b)?).into(), 119 | Gt([a, b]) => (x(a)? > x(b)?).into(), 120 | Lt([a, b]) => (x(a)? < x(b)?).into(), 121 | GtEq([a, b]) => (x(a)? >= x(b)?).into(), 122 | LtEq([a, b]) => (x(a)? <= x(b)?).into(), 123 | And([a, b]) => x(a)?.and(x(b)?), 124 | Or([a, b]) => x(a)?.or(x(b)?), 125 | Xor([a, b]) => x(a)?.xor(x(b)?), 126 | Max(a) | Min(a) | Avg(a) => x(a)?.clone(), 127 | _ => return None, 128 | }) 129 | } 130 | 131 | /// Union `id` with a new constant node if it's constant. 132 | pub fn union_constant(egraph: &mut EGraph, id: Id) { 133 | if let Some(val) = &egraph[id].data.constant { 134 | let added = egraph.add(Expr::Constant(val.clone())); 135 | egraph.union(id, added); 136 | } 137 | } 138 | 139 | /// Returns true if the expression is a non-zero constant. 140 | fn is_not_zero(var: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 141 | value_is(var, |v| !v.is_zero()) 142 | } 143 | 144 | fn value_is(v: &str, f: impl Fn(&Value) -> bool) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 145 | let v = v.parse::().unwrap(); 146 | move |egraph, _, subst| { 147 | if let Some(n) = &egraph[subst[v]].data.constant { 148 | f(n) 149 | } else { 150 | false 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src_ans/lib.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hash; 2 | 3 | use egg::{define_language, Analysis, DidMerge, Id, Var}; 4 | 5 | pub mod agg; 6 | pub mod expr; 7 | pub mod plan; 8 | pub mod schema; 9 | mod value; 10 | 11 | pub use value::*; 12 | 13 | pub type RecExpr = egg::RecExpr; 14 | pub type EGraph = egg::EGraph; 15 | pub type Rewrite = egg::Rewrite; 16 | 17 | define_language! { 18 | pub enum Expr { 19 | // values 20 | Constant(Value), // null, true, 1, 'hello' 21 | ColumnIndex(ColumnIndex), // #0, #1, ... 22 | 23 | // utilities 24 | "`" = Nested(Id), // (` expr) a wrapper over expr to prevent optimization 25 | "list" = List(Box<[Id]>), // (list ...) 26 | 27 | // unary operations 28 | "-" = Neg(Id), 29 | "not" = Not(Id), 30 | "isnull" = IsNull(Id), 31 | 32 | // binary operations 33 | "+" = Add([Id; 2]), 34 | "-" = Sub([Id; 2]), 35 | "*" = Mul([Id; 2]), 36 | "/" = Div([Id; 2]), 37 | "=" = Eq([Id; 2]), 38 | "<>" = NotEq([Id; 2]), 39 | ">" = Gt([Id; 2]), 40 | "<" = Lt([Id; 2]), 41 | ">=" = GtEq([Id; 2]), 42 | "<=" = LtEq([Id; 2]), 43 | "and" = And([Id; 2]), 44 | "or" = Or([Id; 2]), 45 | "xor" = Xor([Id; 2]), 46 | 47 | // aggregations 48 | "max" = Max(Id), 49 | "min" = Min(Id), 50 | "sum" = Sum(Id), 51 | "avg" = Avg(Id), 52 | "count" = Count(Id), 53 | 54 | // plans 55 | "scan" = Scan([Id; 2]), // (scan table [column..]) 56 | "values" = Values(Box<[Id]>), // (values [expr..]..) 57 | "proj" = Proj([Id; 2]), // (proj [expr..] child) 58 | "filter" = Filter([Id; 2]), // (filter expr child) 59 | "order" = Order([Id; 2]), // (order [order_key..] child) 60 | "asc" = Asc(Id), // (asc key) 61 | "desc" = Desc(Id), // (desc key) 62 | "limit" = Limit([Id; 3]), // (limit limit offset child) 63 | "topn" = TopN([Id; 4]), // (topn limit offset [order_key..] child) 64 | "join" = Join([Id; 4]), // (join join_type expr left right) 65 | "hashjoin" = HashJoin([Id; 5]), // (hashjoin join_type [left_expr..] [right_expr..] left right) 66 | "inner" = Inner, 67 | "left_outer" = LeftOuter, 68 | "right_outer" = RightOuter, 69 | "full_outer" = FullOuter, 70 | "agg" = Agg([Id; 3]), // (agg aggs=[expr..] group_keys=[expr..] child) 71 | // expressions must be agg 72 | // output = aggs || group_keys 73 | 74 | // internal functions 75 | "column-merge" = ColumnMerge([Id; 2]), // (column-merge list1 list2) 76 | // return a list of columns from list1 and list2 77 | "column-prune" = ColumnPrune([Id; 2]), // (column-prune filter list) 78 | // remove element from `list` whose column set is not a subset of `filter` 79 | "empty" = Empty(Id), // (empty child) 80 | // returns empty chunk 81 | // with the same schema as `child` 82 | 83 | Column(Column), // t.a, b, c 84 | } 85 | } 86 | 87 | impl Expr { 88 | fn as_list(&self) -> &[Id] { 89 | match self { 90 | Expr::List(l) => l, 91 | _ => panic!("expected list"), 92 | } 93 | } 94 | } 95 | 96 | trait ExprExt { 97 | fn as_list(&self) -> &[Id]; 98 | } 99 | 100 | impl ExprExt for egg::EClass { 101 | fn as_list(&self) -> &[Id] { 102 | self.iter() 103 | .find_map(|e| match e { 104 | Expr::List(list) => Some(list), 105 | _ => None, 106 | }) 107 | .expect("not list") 108 | } 109 | } 110 | 111 | /// The unified analysis for all rules. 112 | #[derive(Default)] 113 | pub struct ExprAnalysis; 114 | 115 | /// The analysis data associated with each eclass. 116 | /// 117 | /// See [`egg::Analysis`] for how data is being processed. 118 | #[derive(Debug)] 119 | pub struct Data { 120 | /// Some if the expression is a constant. 121 | pub constant: expr::ConstValue, 122 | 123 | /// All columns involved in the node. 124 | pub columns: plan::ColumnSet, 125 | 126 | /// All aggragations in the tree. 127 | pub aggs: agg::AggSet, 128 | 129 | /// The schema for plan node: a list of expressions. 130 | /// 131 | /// For non-plan node, it is always None. 132 | /// For plan node, it may be None if the schema is unknown due to unresolved `prune`. 133 | pub schema: schema::Schema, 134 | } 135 | 136 | impl Analysis for ExprAnalysis { 137 | type Data = Data; 138 | 139 | /// Analyze a node and give the result. 140 | fn make(egraph: &EGraph, enode: &Expr) -> Self::Data { 141 | Data { 142 | constant: expr::eval_constant(egraph, enode), 143 | columns: plan::analyze_columns(egraph, enode), 144 | aggs: agg::analyze_aggs(egraph, enode), 145 | schema: schema::analyze_schema(egraph, enode), 146 | } 147 | } 148 | 149 | /// Merge the analysis data with previous one. 150 | /// 151 | /// This process makes the analysis data more accurate. 152 | /// 153 | /// For example, if we have an expr `a + 1 - a`, the constant analysis will give a result `None` 154 | /// since we are not sure if it is a constant or not. But after we applied a rule and turned 155 | /// it to `a - a + 1` -> `0 + 1`, we know it is a constant. Then in this function, we merge the 156 | /// new result `Some(1)` with the previous `None` and keep `Some(1)` as the final result. 157 | fn merge(&mut self, to: &mut Self::Data, from: Self::Data) -> DidMerge { 158 | let merge_const = egg::merge_max(&mut to.constant, from.constant); 159 | let merge_columns = plan::merge(&mut to.columns, from.columns); 160 | let merge_aggs = egg::merge_max(&mut to.aggs, from.aggs); 161 | let merge_schema = egg::merge_max(&mut to.schema, from.schema); 162 | merge_const | merge_columns | merge_aggs | merge_schema 163 | } 164 | 165 | /// Modify the graph after analyzing a node. 166 | fn modify(egraph: &mut EGraph, id: Id) { 167 | expr::union_constant(egraph, id); 168 | } 169 | } 170 | 171 | /// Create a [`Var`] from string. 172 | /// 173 | /// This is a helper function for submodules. 174 | fn var(s: &str) -> Var { 175 | s.parse().expect("invalid variable") 176 | } 177 | -------------------------------------------------------------------------------- /src_ans/plan.rs: -------------------------------------------------------------------------------- 1 | //! Plan optimization rules. 2 | 3 | use std::collections::HashSet; 4 | 5 | use crate::schema::schema_is_eq; 6 | 7 | use super::*; 8 | use egg::{rewrite as rw, Applier, Language, Pattern, PatternAst, Subst, Symbol, Var}; 9 | 10 | /// Returns the rules that always improve the plan. 11 | pub fn rules() -> Vec { 12 | let mut rules = vec![]; 13 | rules.extend(cancel_rules()); 14 | rules.extend(merge_rules()); 15 | rules.extend(predicate_pushdown_rules()); 16 | rules.extend(projection_pushdown_rules()); 17 | rules.extend(join_rules()); 18 | rules 19 | } 20 | 21 | #[rustfmt::skip] 22 | fn cancel_rules() -> Vec { vec![ 23 | rw!("limit-null"; "(limit null 0 ?child)" => "?child"), 24 | rw!("limit-0"; "(limit 0 ?offset ?child)" => "(empty ?child)"), 25 | rw!("order-null"; "(order (list) ?child)" => "?child"), 26 | rw!("filter-true"; "(filter true ?child)" => "?child"), 27 | rw!("filter-false"; "(filter false ?child)" => "(empty ?child)"), 28 | rw!("inner-join-false"; "(join inner false ?l ?r)" => "(empty (join inner false ?l ?r))"), 29 | 30 | rw!("proj-on-empty"; "(proj ?exprs (empty ?c))" => "(empty ?exprs)"), 31 | rw!("filter-on-empty"; "(filter ?cond (empty ?c))" => "(empty ?c)"), 32 | rw!("order-on-empty"; "(order ?keys (empty ?c))" => "(empty ?c)"), 33 | rw!("limit-on-empty"; "(limit ?limit ?offset (empty ?c))" => "(empty ?c)"), 34 | rw!("topn-on-empty"; "(topn ?limit ?offset ?keys (empty ?c))" => "(empty ?c)"), 35 | rw!("inner-join-on-left-empty"; "(join inner ?on (empty ?l) ?r)" => "(empty (join inner false ?l ?r))"), 36 | rw!("inner-join-on-right-empty"; "(join inner ?on ?l (empty ?r))" => "(empty (join inner false ?l ?r))"), 37 | ]} 38 | 39 | #[rustfmt::skip] 40 | fn merge_rules() -> Vec { vec![ 41 | rw!("limit-order-topn"; 42 | "(limit ?limit ?offset (order ?keys ?child))" => 43 | "(topn ?limit ?offset ?keys ?child)" 44 | ), 45 | rw!("filter-merge"; 46 | "(filter ?cond1 (filter ?cond2 ?child))" => 47 | "(filter (and ?cond1 ?cond2) ?child)" 48 | ), 49 | rw!("proj-merge"; 50 | "(proj ?exprs1 (proj ?exprs2 ?child))" => 51 | "(proj ?exprs1 ?child)" 52 | ), 53 | ]} 54 | 55 | #[rustfmt::skip] 56 | fn predicate_pushdown_rules() -> Vec { vec![ 57 | pushdown("filter", "?cond", "order", "?keys"), 58 | pushdown("filter", "?cond", "limit", "?limit ?offset"), 59 | pushdown("filter", "?cond", "topn", "?limit ?offset ?keys"), 60 | rw!("pushdown-filter-join"; 61 | "(filter ?cond (join inner ?on ?left ?right))" => 62 | "(join inner (and ?on ?cond) ?left ?right)" 63 | ), 64 | rw!("pushdown-filter-join-left"; 65 | "(join inner (and ?cond1 ?cond2) ?left ?right)" => 66 | "(join inner ?cond2 (filter ?cond1 ?left) ?right)" 67 | if columns_is_subset("?cond1", "?left") 68 | ), 69 | rw!("pushdown-filter-join-left-1"; 70 | "(join inner ?cond1 ?left ?right)" => 71 | "(join inner true (filter ?cond1 ?left) ?right)" 72 | if columns_is_subset("?cond1", "?left") 73 | ), 74 | rw!("pushdown-filter-join-right"; 75 | "(join inner (and ?cond1 ?cond2) ?left ?right)" => 76 | "(join inner ?cond2 ?left (filter ?cond1 ?right))" 77 | if columns_is_subset("?cond1", "?right") 78 | ), 79 | rw!("pushdown-filter-join-right-1"; 80 | "(join inner ?cond1 ?left ?right)" => 81 | "(join inner true ?left (filter ?cond1 ?right))" 82 | if columns_is_subset("?cond1", "?right") 83 | ), 84 | ]} 85 | 86 | /// Returns a rule to pushdown plan `a` through `b`. 87 | fn pushdown(a: &str, a_args: &str, b: &str, b_args: &str) -> Rewrite { 88 | let name = format!("pushdown-{a}-{b}"); 89 | let searcher = format!("({a} {a_args} ({b} {b_args} ?child))") 90 | .parse::>() 91 | .unwrap(); 92 | let applier = format!("({b} {b_args} ({a} {a_args} ?child))") 93 | .parse::>() 94 | .unwrap(); 95 | Rewrite::new(name, searcher, applier).unwrap() 96 | } 97 | 98 | #[rustfmt::skip] 99 | pub fn join_rules() -> Vec { vec![ 100 | // we only have right rotation rule, 101 | // because the initial state is always a left-deep tree 102 | // thus left rotation is not needed. 103 | rw!("join-reorder"; 104 | "(join ?type ?cond2 (join ?type ?cond1 ?left ?mid) ?right)" => 105 | "(join ?type ?cond1 ?left (join ?type ?cond2 ?mid ?right))" 106 | if columns_is_disjoint("?cond2", "?left") 107 | ), 108 | rw!("hash-join-on-one-eq"; 109 | "(join ?type (= ?el ?er) ?left ?right)" => 110 | "(hashjoin ?type (list ?el) (list ?er) ?left ?right)" 111 | if columns_is_subset("?el", "?left") 112 | if columns_is_subset("?er", "?right") 113 | ), 114 | rw!("hash-join-on-two-eq"; 115 | "(join ?type (and (= ?l1 ?r1) (= ?l2 ?r2)) ?left ?right)" => 116 | "(hashjoin ?type (list ?l1 ?l2) (list ?r1 ?r2) ?left ?right)" 117 | if columns_is_subset("?l1", "?left") 118 | if columns_is_subset("?l2", "?left") 119 | if columns_is_subset("?r1", "?right") 120 | if columns_is_subset("?r2", "?right") 121 | ), 122 | // TODO: support more than two equals 123 | ]} 124 | 125 | /// Returns true if the columns in `var1` are a subset of the columns in `var2`. 126 | fn columns_is_subset(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 127 | columns_is(var1, var2, ColumnSet::is_subset) 128 | } 129 | 130 | /// Returns true if the columns in `var1` has no elements in common with the columns in `var2`. 131 | fn columns_is_disjoint(var1: &str, var2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 132 | columns_is(var1, var2, ColumnSet::is_disjoint) 133 | } 134 | 135 | fn columns_is( 136 | var1: &str, 137 | var2: &str, 138 | f: impl Fn(&ColumnSet, &ColumnSet) -> bool, 139 | ) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 140 | let var1 = var(var1); 141 | let var2 = var(var2); 142 | move |egraph, _, subst| { 143 | let var1_set = &egraph[subst[var1]].data.columns; 144 | let var2_set = &egraph[subst[var2]].data.columns; 145 | f(var1_set, var2_set) 146 | } 147 | } 148 | 149 | /// The data type of column analysis. 150 | pub type ColumnSet = HashSet; 151 | 152 | /// Returns all columns involved in the node. 153 | pub fn analyze_columns(egraph: &EGraph, enode: &Expr) -> ColumnSet { 154 | use Expr::*; 155 | let x = |i: &Id| &egraph[*i].data.columns; 156 | match enode { 157 | Column(col) => [*col].into_iter().collect(), 158 | Proj([exprs, _]) => x(exprs).clone(), 159 | Agg([exprs, group_keys, _]) => x(exprs).union(x(group_keys)).cloned().collect(), 160 | ColumnPrune([filter, _]) => x(filter).clone(), // inaccurate 161 | _ => { 162 | // merge the columns from all children 163 | (enode.children().iter()) 164 | .flat_map(|id| x(id).iter().cloned()) 165 | .collect() 166 | } 167 | } 168 | } 169 | 170 | /// Merge two result set and keep the smaller one. 171 | pub fn merge(to: &mut ColumnSet, from: ColumnSet) -> DidMerge { 172 | if from.len() < to.len() { 173 | *to = from; 174 | DidMerge(true, false) 175 | } else { 176 | DidMerge(false, true) 177 | } 178 | } 179 | 180 | /// Pushdown projections and prune unused columns. 181 | #[rustfmt::skip] 182 | pub fn projection_pushdown_rules() -> Vec { vec![ 183 | rw!("identical-proj"; 184 | "(proj ?exprs ?child)" => "?child" 185 | if schema_is_eq("?exprs", "?child") 186 | ), 187 | pushdown("proj", "?exprs", "limit", "?limit ?offset"), 188 | pushdown("limit", "?limit ?offset", "proj", "?exprs"), 189 | rw!("pushdown-proj-order"; 190 | "(proj ?exprs (order ?keys ?child))" => 191 | "(proj ?exprs (order ?keys (proj (column-merge ?exprs ?keys) ?child)))" 192 | ), 193 | rw!("pushdown-proj-topn"; 194 | "(proj ?exprs (topn ?limit ?offset ?keys ?child))" => 195 | "(proj ?exprs (topn ?limit ?offset ?keys (proj (column-merge ?exprs ?keys) ?child)))" 196 | ), 197 | rw!("pushdown-proj-filter"; 198 | "(proj ?exprs (filter ?cond ?child))" => 199 | "(proj ?exprs (filter ?cond (proj (column-merge ?exprs ?cond) ?child)))" 200 | ), 201 | rw!("pushdown-proj-agg"; 202 | "(agg ?aggs ?groupby ?child)" => 203 | "(agg ?aggs ?groupby (proj (column-merge ?aggs ?groupby) ?child))" 204 | ), 205 | rw!("pushdown-proj-join"; 206 | "(proj ?exprs (join ?type ?on ?left ?right))" => 207 | "(proj ?exprs (join ?type ?on 208 | (proj (column-prune ?left (column-merge ?exprs ?on)) ?left) 209 | (proj (column-prune ?right (column-merge ?exprs ?on)) ?right) 210 | ))" 211 | ), 212 | // column pruning 213 | rw!("pushdown-proj-scan"; 214 | "(proj ?exprs (scan ?table ?columns))" => 215 | "(proj ?exprs (scan ?table (column-prune ?exprs ?columns)))" 216 | ), 217 | // evaluate 'column-merge' and 'column-prune' 218 | rw!("column-merge"; 219 | "(column-merge ?list1 ?list2)" => 220 | { ColumnMerge { 221 | lists: [var("?list1"), var("?list2")], 222 | }} 223 | ), 224 | rw!("column-prune"; 225 | "(column-prune ?filter ?list)" => 226 | { ColumnPrune { 227 | filter: var("?filter"), 228 | list: var("?list"), 229 | }} 230 | if is_list("?list") 231 | ), 232 | ]} 233 | 234 | /// Return a list of columns from `lists`. 235 | struct ColumnMerge { 236 | lists: [Var; 2], 237 | } 238 | 239 | impl Applier for ColumnMerge { 240 | fn apply_one( 241 | &self, 242 | egraph: &mut EGraph, 243 | eclass: Id, 244 | subst: &Subst, 245 | _searcher_ast: Option<&PatternAst>, 246 | _rule_name: Symbol, 247 | ) -> Vec { 248 | let list1 = &egraph[subst[self.lists[0]]].data.columns; 249 | let list2 = &egraph[subst[self.lists[1]]].data.columns; 250 | let mut list: Vec<&Column> = list1.union(list2).collect(); 251 | list.sort_unstable_by_key(|c| c.as_str()); 252 | let list = list 253 | .into_iter() 254 | .map(|col| egraph.lookup(Expr::Column(col.clone())).unwrap()) 255 | .collect(); 256 | let id = egraph.add(Expr::List(list)); 257 | 258 | // copied from `Pattern::apply_one` 259 | if egraph.union(eclass, id) { 260 | vec![eclass] 261 | } else { 262 | vec![] 263 | } 264 | } 265 | } 266 | 267 | /// Remove element from `list` whose column set is not a subset of `filter` 268 | struct ColumnPrune { 269 | filter: Var, 270 | list: Var, 271 | } 272 | 273 | impl Applier for ColumnPrune { 274 | fn apply_one( 275 | &self, 276 | egraph: &mut EGraph, 277 | eclass: Id, 278 | subst: &Subst, 279 | _searcher_ast: Option<&PatternAst>, 280 | _rule_name: Symbol, 281 | ) -> Vec { 282 | let columns = &egraph[subst[self.filter]].data.columns; 283 | let list = egraph[subst[self.list]].as_list(); 284 | let pruned = (list.iter().cloned()) 285 | .filter(|id| egraph[*id].data.columns.is_subset(columns)) 286 | .collect(); 287 | let id = egraph.add(Expr::List(pruned)); 288 | 289 | // copied from `Pattern::apply_one` 290 | if egraph.union(eclass, id) { 291 | vec![eclass] 292 | } else { 293 | vec![] 294 | } 295 | } 296 | } 297 | 298 | /// Returns true if the variable is a list. 299 | fn is_list(v: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 300 | let v = var(v); 301 | move |egraph, _, subst| { 302 | egraph[subst[v]] 303 | .iter() 304 | .any(|node| matches!(node, Expr::List(_))) 305 | } 306 | } 307 | -------------------------------------------------------------------------------- /src_ans/schema.rs: -------------------------------------------------------------------------------- 1 | //! Analyze schema and replace all column references with physical indices. 2 | //! 3 | //! This is the final step before executing. 4 | 5 | use egg::Subst; 6 | 7 | use super::*; 8 | 9 | /// The data type of schema analysis. 10 | pub type Schema = Option>; 11 | 12 | /// Returns the output expressions for plan node. 13 | pub fn analyze_schema(egraph: &EGraph, enode: &Expr) -> Schema { 14 | use Expr::*; 15 | let x = |i: &Id| egraph[*i].data.schema.clone(); 16 | let concat = |v1: Vec, v2: Vec| v1.into_iter().chain(v2.into_iter()).collect(); 17 | Some(match enode { 18 | // equal to child 19 | Filter([_, c]) | Order([_, c]) | Limit([_, _, c]) | TopN([_, _, _, c]) | Empty(c) => x(c)?, 20 | 21 | // concat 2 children 22 | Join([_, _, l, r]) | HashJoin([_, _, _, l, r]) => concat(x(l)?, x(r)?), 23 | 24 | // list is the source for the following nodes 25 | List(ids) => ids.to_vec(), 26 | 27 | // plans that change schema 28 | Scan([_, columns]) => x(columns)?, 29 | Values(vs) => vs.first().and_then(x)?, 30 | Proj([exprs, _]) => x(exprs)?, 31 | Agg([exprs, group_keys, _]) => concat(x(exprs)?, x(group_keys)?), 32 | 33 | // not plan node 34 | _ => return None, 35 | }) 36 | } 37 | 38 | /// Returns true if the schema of two nodes is equal. 39 | pub fn schema_is_eq(v1: &str, v2: &str) -> impl Fn(&mut EGraph, Id, &Subst) -> bool { 40 | let v1 = var(v1); 41 | let v2 = var(v2); 42 | move |egraph, _, subst| { 43 | let s1 = &egraph[subst[v1]].data.schema; 44 | let s2 = &egraph[subst[v2]].data.schema; 45 | s1.is_some() && s1 == s2 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src_ans/value.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::Display, 3 | ops::{Neg, Not}, 4 | str::FromStr, 5 | }; 6 | 7 | /// SQL value. 8 | /// 9 | /// # Display and Parse Format 10 | /// 11 | /// - Null: `null` 12 | /// - Bool: `false` 13 | /// - Integer: `1` 14 | /// - String: `'string'` 15 | #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] 16 | pub enum Value { 17 | Null, 18 | Bool(bool), 19 | Int(i32), 20 | String(String), 21 | } 22 | 23 | impl Display for Value { 24 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 25 | match self { 26 | Value::Null => write!(f, "null"), 27 | Value::Bool(b) => write!(f, "{b}"), 28 | Value::Int(i) => write!(f, "{i}"), 29 | Value::String(s) => write!(f, "'{s}'"), 30 | } 31 | } 32 | } 33 | 34 | impl FromStr for Value { 35 | type Err = String; 36 | 37 | fn from_str(s: &str) -> Result { 38 | if s == "null" { 39 | return Ok(Value::Null); 40 | } else if let Ok(i) = s.parse() { 41 | return Ok(Value::Bool(i)); 42 | } else if let Ok(i) = s.parse() { 43 | return Ok(Value::Int(i)); 44 | } else if s.starts_with('\'') && s.ends_with('\'') { 45 | return Ok(Value::String(s[1..s.len() - 1].to_string())); 46 | } 47 | Err(s.to_string()) 48 | } 49 | } 50 | 51 | impl From for Value { 52 | fn from(b: bool) -> Self { 53 | Value::Bool(b) 54 | } 55 | } 56 | 57 | impl Value { 58 | pub fn is_null(&self) -> bool { 59 | matches!(self, Value::Null) 60 | } 61 | 62 | pub fn is_zero(&self) -> bool { 63 | matches!(self, Value::Int(0)) 64 | } 65 | } 66 | 67 | macro_rules! impl_arith_for_value { 68 | ($Trait:ident, $name:ident) => { 69 | impl std::ops::$Trait for &Value { 70 | type Output = Value; 71 | 72 | fn $name(self, rhs: Self) -> Self::Output { 73 | use Value::*; 74 | match (self, rhs) { 75 | (Null, _) | (_, Null) => Null, 76 | (&Int(x), &Int(y)) => Int(x.$name(y)), 77 | _ => panic!( 78 | "invalid operation: {:?} {} {:?}", 79 | self, 80 | stringify!($name), 81 | rhs 82 | ), 83 | } 84 | } 85 | } 86 | 87 | impl std::ops::$Trait for Value { 88 | type Output = Value; 89 | fn $name(self, rhs: Self) -> Self::Output { 90 | (&self).$name(&rhs) 91 | } 92 | } 93 | }; 94 | } 95 | impl_arith_for_value!(Add, add); 96 | impl_arith_for_value!(Sub, sub); 97 | impl_arith_for_value!(Mul, mul); 98 | impl_arith_for_value!(Div, div); 99 | impl_arith_for_value!(Rem, rem); 100 | 101 | impl Neg for Value { 102 | type Output = Value; 103 | 104 | fn neg(self) -> Self::Output { 105 | use Value::*; 106 | match self { 107 | Null => Null, 108 | Int(i) => Int(-i), 109 | _ => panic!("invalid operation: -{:?}", self), 110 | } 111 | } 112 | } 113 | 114 | impl Value { 115 | pub fn and(&self, rhs: &Value) -> Value { 116 | use Value::*; 117 | match (self, rhs) { 118 | (Null, _) | (_, Null) => Null, 119 | (Bool(false), _) | (_, Bool(false)) => Bool(false), 120 | (&Bool(x), &Bool(y)) => Bool(x && y), 121 | _ => panic!("invalid operation: {:?} and {:?}", self, rhs), 122 | } 123 | } 124 | 125 | pub fn or(&self, rhs: &Value) -> Value { 126 | use Value::*; 127 | match (self, rhs) { 128 | (Null, _) | (_, Null) => Null, 129 | (Bool(true), _) | (_, Bool(true)) => Bool(true), 130 | (&Bool(x), &Bool(y)) => Bool(x || y), 131 | _ => panic!("invalid operation: {:?} or {:?}", self, rhs), 132 | } 133 | } 134 | 135 | pub fn xor(&self, rhs: &Value) -> Value { 136 | use Value::*; 137 | match (self, rhs) { 138 | (Null, _) | (_, Null) => Null, 139 | (&Bool(x), &Bool(y)) => Bool(x ^ y), 140 | _ => panic!("invalid operation: {:?} xor {:?}", self, rhs), 141 | } 142 | } 143 | } 144 | 145 | impl Not for Value { 146 | type Output = Value; 147 | 148 | fn not(self) -> Self::Output { 149 | use Value::*; 150 | match self { 151 | Null => Null, 152 | Bool(b) => Bool(!b), 153 | _ => panic!("invalid operation: not {:?}", self), 154 | } 155 | } 156 | } 157 | 158 | pub type Column = egg::Symbol; 159 | 160 | /// The physical index to the column of the child plan. 161 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] 162 | pub struct ColumnIndex(pub u32); 163 | 164 | impl Display for ColumnIndex { 165 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 166 | write!(f, "#{}", self.0) 167 | } 168 | } 169 | 170 | impl FromStr for ColumnIndex { 171 | type Err = String; 172 | 173 | fn from_str(s: &str) -> Result { 174 | let body = s 175 | .strip_prefix('#') 176 | .ok_or_else(|| "no leading #".to_string())?; 177 | let num = body.parse().map_err(|e| format!("invalid number: {e}"))?; 178 | Ok(Self(num)) 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /tests/10_index_resolving.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::plan::rules; 2 | 3 | egg::test_fn! { 4 | identical_projection, 5 | rules(), 6 | "(proj (list a b) 7 | (scan t (list a b)))" => 8 | "(scan t (list a b))", 9 | } 10 | -------------------------------------------------------------------------------- /tests/1_language.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::{Expr, RecExpr, Value}; 2 | 3 | #[test] 4 | fn values() { 5 | assert_parse_value("null", Value::Null); 6 | assert_parse_value("true", Value::Bool(true)); 7 | assert_parse_value("1", Value::Int(1)); 8 | assert_parse_value("'string'", Value::String("string".into())); 9 | } 10 | 11 | #[test] 12 | fn columns() { 13 | assert_parse_expr("a"); 14 | assert_parse_expr("t.a"); 15 | } 16 | 17 | #[test] 18 | fn list() { 19 | assert_parse_expr("(list null 1 2)"); 20 | } 21 | 22 | #[test] 23 | fn operations() { 24 | assert_parse_expr("(isnull null)"); 25 | assert_parse_expr("(- a)"); 26 | assert_parse_expr("(+ a b)"); 27 | assert_parse_expr("(- a b)"); 28 | assert_parse_expr("(* a b)"); 29 | assert_parse_expr("(/ a b)"); 30 | assert_parse_expr("(= a b)"); 31 | assert_parse_expr("(<> a b)"); 32 | assert_parse_expr("(> a b)"); 33 | assert_parse_expr("(< a b)"); 34 | assert_parse_expr("(>= a b)"); 35 | assert_parse_expr("(<= a b)"); 36 | assert_parse_expr("(not a)"); 37 | assert_parse_expr("(and a b)"); 38 | assert_parse_expr("(or a b)"); 39 | assert_parse_expr("(xor a b)"); 40 | } 41 | 42 | #[track_caller] 43 | fn assert_parse_value(expr: &str, value: Value) { 44 | assert_eq!( 45 | expr.parse::().unwrap()[0.into()], 46 | Expr::Constant(value) 47 | ); 48 | } 49 | 50 | #[track_caller] 51 | fn assert_parse_expr(expr: &str) { 52 | assert_eq!(expr.parse::().unwrap().to_string(), expr); 53 | } 54 | -------------------------------------------------------------------------------- /tests/2_rewrite.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::expr::rules; 2 | 3 | egg::test_fn! { 4 | add_sub, 5 | rules(), 6 | "(+ (- (- a 0)) (+ a b))" => "b", 7 | } 8 | 9 | egg::test_fn! { 10 | mul, 11 | rules(), 12 | "(+ (* (- b) a) (* b a))" => "0", 13 | } 14 | 15 | egg::test_fn! { 16 | cmp, 17 | rules(), 18 | "(> (+ a b) a)" => "(< 0 b)", 19 | } 20 | 21 | egg::test_fn! { 22 | boolean, 23 | rules(), 24 | "(and (xor a true) (or (and a b) (and (not b) a)))" => "false", 25 | } 26 | -------------------------------------------------------------------------------- /tests/3_conditional_rewrite.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::expr::rules; 2 | 3 | egg::test_fn! { 4 | #[should_panic] 5 | mul_div_0, 6 | rules(), 7 | "(/ (* a 0) 0)" => "a", 8 | } 9 | 10 | egg::test_fn! { 11 | mul_div, 12 | rules(), 13 | "(/ (* a 2) 2)" => "a", 14 | } 15 | -------------------------------------------------------------------------------- /tests/4_constant_folding.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::expr::rules; 2 | 3 | egg::test_fn! { 4 | arithmetic, 5 | rules(), 6 | "(- (+ 1 (- 2 (* 3 (/ 4 5)))))" => "-3", 7 | } 8 | 9 | egg::test_fn! { 10 | cmp, 11 | rules(), 12 | "(> 1 2)" => "false", 13 | } 14 | 15 | egg::test_fn! { 16 | null, 17 | rules(), 18 | "(isnull (- (+ 1 (- 2 (* 3 (/ 4 null))))))" => "true", 19 | } 20 | 21 | egg::test_fn! { 22 | boolean, 23 | rules(), 24 | "(not (and (or null true) (xor (and false null) true)))" => "false", 25 | } 26 | -------------------------------------------------------------------------------- /tests/5_sql_plan.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::RecExpr; 2 | 3 | #[test] 4 | fn aggregations() { 5 | assert_parse_expr("(max a)"); 6 | assert_parse_expr("(min a)"); 7 | assert_parse_expr("(sum a)"); 8 | assert_parse_expr("(avg a)"); 9 | assert_parse_expr("(count a)"); 10 | } 11 | 12 | #[test] 13 | fn plans() { 14 | // SELECT a, b FROM t; 15 | assert_parse_expr("(scan t (list a b))"); 16 | 17 | // VALUES (false, 1), (true, 2); 18 | assert_parse_expr( 19 | "(values (list 20 | (list false 1) 21 | (list true 2) 22 | ))", 23 | ); 24 | 25 | let child = "(scan t (list a b))"; 26 | // SELECT a FROM t; 27 | assert_parse_expr(&format!("(proj (list a) {child})")); 28 | 29 | // SELECT max(a) FROM t GROUP BY b; 30 | assert_parse_expr(&format!("(agg (list (max a)) (list b) {child})")); 31 | 32 | // SELECT a, b FROM t WHERE a = 1; 33 | assert_parse_expr(&format!("(filter (= a 1) {child})")); 34 | 35 | // SELECT a, b FROM t ORDER BY a ASC, b DESC; 36 | assert_parse_expr(&format!("(order (list (asc a) (desc b)) {child})")); 37 | 38 | // SELECT a, b FROM t LIMIT 10 OFFSET 1; 39 | assert_parse_expr(&format!("(limit 10 1 {child})")); 40 | 41 | // SELECT a, b FROM t ORDER BY a ASC, b DESC LIMIT 10 OFFSET 1; 42 | assert_parse_expr(&format!("(topn 10 1 (list (asc a) (desc b)) {child})")); 43 | 44 | // SELECT a, b, c, d FROM t1, t2 WHERE a = c; 45 | for join_type in &["inner", "left_outer", "right_outer", "full_outer"] { 46 | assert_parse_expr(&format!( 47 | "(join {join_type} (list (= a c)) 48 | (scan t1 (list a b)) 49 | (scan t2 (list c d)) 50 | )", 51 | )); 52 | } 53 | 54 | // SELECT a, b, c, d FROM t1, t2 WHERE a = c; 55 | assert_parse_expr( 56 | "(hashjoin inner (list a) (list c) 57 | (scan t1 (list a b)) 58 | (scan t2 (list c d)) 59 | )", 60 | ); 61 | } 62 | 63 | #[track_caller] 64 | fn assert_parse_expr(expr: &str) { 65 | expr.parse::().expect("failed to parse expression"); 66 | } 67 | -------------------------------------------------------------------------------- /tests/6_plan_elimination.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::plan::rules; 2 | 3 | egg::test_fn! { 4 | limit_0, 5 | rules(), 6 | "(limit 0 0 7 | (scan t (list a b)))" => 8 | "(empty (scan t (list a b)))", 9 | } 10 | 11 | egg::test_fn! { 12 | limit_null, 13 | rules(), 14 | "(limit null 0 15 | (scan t (list a b)))" => 16 | "(scan t (list a b))", 17 | } 18 | 19 | egg::test_fn! { 20 | order_null, 21 | rules(), 22 | "(order (list) 23 | (scan t (list a b)))" => 24 | "(scan t (list a b))", 25 | } 26 | 27 | egg::test_fn! { 28 | filter_true, 29 | rules(), 30 | "(filter true 31 | (scan t (list a b)))" => 32 | "(scan t (list a b))", 33 | } 34 | 35 | egg::test_fn! { 36 | filter_false, 37 | rules(), 38 | "(filter false 39 | (scan t (list a b)))" => 40 | "(empty (scan t (list a b)))", 41 | } 42 | 43 | egg::test_fn! { 44 | inner_join_false, 45 | rules(), 46 | "(join inner false 47 | (scan t1 (list a b)) 48 | (scan t2 (list c d)))" => 49 | "(empty (join inner false 50 | (scan t1 (list a b)) 51 | (scan t2 (list c d)) 52 | ))", 53 | } 54 | 55 | egg::test_fn! { 56 | empty_propagation, 57 | rules(), 58 | "(proj (list b) 59 | (limit 1 1 60 | (order (list (asc (sum a))) 61 | (filter (= a 1) 62 | (join inner false 63 | (scan t1 (list a b)) 64 | (scan t2 (list c d)) 65 | )))))" => 66 | "(empty (list b))", 67 | } 68 | 69 | egg::test_fn! { 70 | limit_order_topn, 71 | rules(), 72 | "(limit 10 1 73 | (order (list (asc a)) 74 | (scan t (list a b))))" => 75 | "(topn 10 1 (list (asc a)) 76 | (scan t (list a b)))", 77 | } 78 | 79 | egg::test_fn! { 80 | filter_merge, 81 | rules(), 82 | "(filter (= a 1) 83 | (filter (= b 2) 84 | (scan t (list a b))))" => 85 | "(filter (and (= a 1) (= b 2)) 86 | (scan t (list a b)))" 87 | } 88 | 89 | egg::test_fn! { 90 | proj_merge, 91 | rules(), 92 | "(proj (list a) 93 | (proj (list a b) 94 | (scan t (list a b))))" => 95 | "(proj (list a) 96 | (scan t (list a b)))" 97 | } 98 | -------------------------------------------------------------------------------- /tests/7_predicate_pushdown.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::{expr, plan, Rewrite}; 2 | 3 | fn rules() -> Vec { 4 | let mut rules = vec![]; 5 | rules.extend(expr::rules()); 6 | rules.extend(plan::rules()); 7 | rules 8 | } 9 | 10 | egg::test_fn! { 11 | predicate_pushdown, 12 | rules(), 13 | // SELECT s.name, e.cid 14 | // FROM student AS s, enrolled AS e 15 | // WHERE s.sid = e.sid AND e.grade = 'A' AND s.name <> 'Alice' 16 | " 17 | (proj (list s.name e.cid) 18 | (filter (and (and (= s.sid e.sid) (= e.grade 'A')) (<> s.name 'Alice')) 19 | (join inner true 20 | (scan s (list s.sid s.name)) 21 | (scan e (list e.sid e.cid e.grade)) 22 | )))" => " 23 | (proj (list s.name e.cid) 24 | (join inner (= s.sid e.sid) 25 | (filter (<> s.name 'Alice') 26 | (scan s (list s.sid s.name))) 27 | (filter (= e.grade 'A') 28 | (scan e (list e.sid e.cid e.grade))) 29 | ))" 30 | } 31 | 32 | egg::test_fn! { 33 | join_reorder, 34 | rules(), 35 | // SELECT * FROM t1 36 | // INNER JOIN t2 ON t1.id = t2.id 37 | // INNER JOIN t3 ON t3.id = t2.id 38 | " 39 | (join inner (= t3.id t2.id) 40 | (join inner (= t1.id t2.id) 41 | (scan t1 (list t1.id t1.a)) 42 | (scan t2 (list t2.id t2.b)) 43 | ) 44 | (scan t3 (list t3.id t3.c)) 45 | )" => " 46 | (join inner (= t1.id t2.id) 47 | (scan t1 (list t1.id t1.a)) 48 | (join inner (= t2.id t3.id) 49 | (scan t2 (list t2.id t2.b)) 50 | (scan t3 (list t3.id t3.c)) 51 | ) 52 | )" 53 | } 54 | 55 | egg::test_fn! { 56 | hash_join, 57 | rules(), 58 | // SELECT * FROM t1, t2 59 | // WHERE t1.id = t2.id AND t1.age > 2 60 | " 61 | (filter (and (= t1.id t2.id) (> t1.age 2)) 62 | (join inner true 63 | (scan t1 (list t1.id t1.age)) 64 | (scan t2 (list t2.id t2.name)) 65 | ))" => " 66 | (hashjoin inner (list t1.id) (list t2.id) 67 | (filter (> t1.age 2) 68 | (scan t1 (list t1.id t1.age)) 69 | ) 70 | (scan t2 (list t2.id t2.name)) 71 | )" 72 | } 73 | -------------------------------------------------------------------------------- /tests/8_projection_pushdown.rs: -------------------------------------------------------------------------------- 1 | use sql_optimizer_labs::plan::projection_pushdown_rules; 2 | 3 | egg::test_fn! { 4 | scan, 5 | projection_pushdown_rules(), 6 | // SELECT a FROM t(a, b, c, d) 7 | " 8 | (proj (list a) 9 | (scan t (list a b c d)) 10 | )" => " 11 | (scan t (list a))" 12 | } 13 | 14 | egg::test_fn! { 15 | filter, 16 | projection_pushdown_rules(), 17 | // SELECT a FROM t(a, b, c, d) WHERE b > 1 18 | " 19 | (proj (list a) 20 | (filter (> b 1) 21 | (scan t (list a b c d)) 22 | ))" => " 23 | (proj (list a) 24 | (filter (> b 1) 25 | (scan t (list a b)) 26 | ))" 27 | } 28 | 29 | egg::test_fn! { 30 | join, 31 | projection_pushdown_rules(), 32 | // SELECT b FROM t1(a, b, c, d) JOIN t2(x, y, z, w) ON a = x 33 | " 34 | (proj (list b) 35 | (join inner (= a x) 36 | (scan t1 (list a b c d)) 37 | (scan t2 (list x y z w)) 38 | ))" => " 39 | (proj (list b) 40 | (join inner (= a x) 41 | (scan t1 (list a b)) 42 | (scan t2 (list x)) 43 | ))" 44 | } 45 | 46 | egg::test_fn! { 47 | agg, 48 | projection_pushdown_rules(), 49 | // SELECT sum(a) FROM t(a, b, c, d) GROUP BY b 50 | " 51 | (proj (list (sum a)) 52 | (agg (list (sum a)) (list b) 53 | (scan t (list a b c d)) 54 | ))" => " 55 | (proj (list (sum a)) 56 | (agg (list (sum a)) (list b) 57 | (scan t (list a b)) 58 | ))" 59 | } 60 | 61 | egg::test_fn! { 62 | having, 63 | projection_pushdown_rules(), 64 | // SELECT b FROM t(a, b, c, d) GROUP BY b HAVING sum(a) > 1 65 | " 66 | (proj (list b) 67 | (filter (> (sum a) 1) 68 | (agg (list (sum a)) (list b) 69 | (scan t (list a b c d)) 70 | )))" => " 71 | (proj (list b) 72 | (filter (> (sum a) 1) 73 | (proj (list a b) 74 | (agg (list (sum a)) (list b) 75 | (scan t (list a b)) 76 | ))))" 77 | } 78 | 79 | egg::test_fn! { 80 | projection_pushdown_2, 81 | projection_pushdown_rules(), 82 | // SELECT b 83 | // FROM t1(a, b, c, d) 84 | // JOIN t2(x, y, z, w) ON a = x 85 | // WHERE y > 1 86 | // GROUP BY b, c 87 | // HAVING sum(z) = 1 88 | // ORDER BY c; 89 | " 90 | (proj (list b) 91 | (order (list (asc c)) 92 | (filter (= (sum z) 1) 93 | (agg (list (sum z)) (list b c) 94 | (filter (> y 1) 95 | (join inner (= a x) 96 | (scan t1 (list a b c d)) 97 | (scan t2 (list x y z w)) 98 | ))))))" => " 99 | (proj (list b) 100 | (order (list (asc c)) 101 | (proj (list b c) 102 | (filter (= (sum z) 1) 103 | (proj (list b c z) 104 | (agg (list (sum z)) (list b c) 105 | (proj (list b c z) 106 | (filter (> y 1) 107 | (proj (list b c y z) 108 | (join inner (= a x) 109 | (scan t1 (list a b c)) 110 | (scan t2 (list x y z)) 111 | ))))))))))" 112 | } 113 | -------------------------------------------------------------------------------- /tests/9_agg_extraction.rs: -------------------------------------------------------------------------------- 1 | use egg::Language; 2 | use sql_optimizer_labs::{ 3 | agg::{plan_select, Error}, 4 | EGraph, RecExpr, 5 | }; 6 | 7 | #[test] 8 | fn no_agg() { 9 | // SELECT a FROM t; 10 | test(Case { 11 | select: "(list a)", 12 | from: "(scan t (list a))", 13 | where_: "", 14 | having: "", 15 | groupby: "", 16 | orderby: "", 17 | expected: Ok(" 18 | (proj (list a) 19 | (order list 20 | (filter true 21 | (filter true 22 | (scan t (list a)) 23 | ))))"), 24 | }); 25 | } 26 | 27 | #[test] 28 | fn agg() { 29 | // SELECT sum(a + b) + (a + 1) FROM t 30 | // WHERE b > 1 31 | // GROUP BY a + 1 32 | // HAVING count(a) > 1 33 | // ORDER BY max(b) 34 | test(Case { 35 | select: "(list (+ (sum (+ a b)) (+ a 1)))", 36 | from: "(scan t (list a b))", 37 | where_: "(> b 1)", 38 | having: "(> (count a) 1)", 39 | groupby: "(list (+ a 1))", 40 | orderby: "(list (asc (max b)))", 41 | expected: Ok(" 42 | (proj (list (+ (` (sum (+ a b))) (` (+ a 1)))) 43 | (order (list (asc (` (max b)))) 44 | (filter (> (` (count a)) 1) 45 | (agg (list (sum (+ a b)) (count a) (max b)) (list (+ a 1)) 46 | (filter (> b 1) 47 | (scan t (list a b)) 48 | )))))"), 49 | }); 50 | } 51 | 52 | #[test] 53 | fn error_agg_in_where() { 54 | // SELECT a FROM t WHERE sum(a) > 1 55 | test(Case { 56 | select: "(list a)", 57 | from: "(scan t (list a b))", 58 | where_: "(> (sum a) 1)", 59 | having: "", 60 | groupby: "", 61 | orderby: "", 62 | expected: Err(Error::AggInWhere), 63 | }); 64 | } 65 | 66 | #[test] 67 | fn error_agg_in_groupby() { 68 | // SELECT a FROM t GROUP BY sum(a) 69 | test(Case { 70 | select: "(list a)", 71 | from: "(scan t (list a b))", 72 | where_: "", 73 | having: "", 74 | groupby: "(list (sum a))", 75 | orderby: "", 76 | expected: Err(Error::AggInGroupBy), 77 | }); 78 | } 79 | 80 | #[test] 81 | fn error_nested_agg() { 82 | // SELECT count(sum(a)) FROM t 83 | test(Case { 84 | select: "(list (count (sum a)))", 85 | from: "(scan t (list a b))", 86 | where_: "", 87 | having: "", 88 | groupby: "", 89 | orderby: "", 90 | expected: Err(Error::NestedAgg("count".into())), 91 | }); 92 | } 93 | 94 | #[test] 95 | fn error_column_not_in_agg() { 96 | // SELECT b FROM t GROUP BY a 97 | test(Case { 98 | select: "(list b)", 99 | from: "(scan t (list a b))", 100 | where_: "", 101 | having: "", 102 | groupby: "(list a)", 103 | orderby: "", 104 | expected: Err(Error::ColumnNotInAgg("b".into())), 105 | }); 106 | } 107 | 108 | struct Case { 109 | select: &'static str, 110 | from: &'static str, 111 | where_: &'static str, 112 | having: &'static str, 113 | groupby: &'static str, 114 | orderby: &'static str, 115 | expected: Result<&'static str, Error>, 116 | } 117 | 118 | #[track_caller] 119 | fn test(mut case: Case) { 120 | if case.where_.is_empty() { 121 | case.where_ = "true"; 122 | } 123 | if case.having.is_empty() { 124 | case.having = "true"; 125 | } 126 | if case.groupby.is_empty() { 127 | case.groupby = "list"; 128 | } 129 | if case.orderby.is_empty() { 130 | case.orderby = "list"; 131 | } 132 | let mut egraph = EGraph::default(); 133 | let projection = egraph.add_expr(&case.select.parse().unwrap()); 134 | let from = egraph.add_expr(&case.from.parse().unwrap()); 135 | let where_ = egraph.add_expr(&case.where_.parse().unwrap()); 136 | let having = egraph.add_expr(&case.having.parse().unwrap()); 137 | let groupby = egraph.add_expr(&case.groupby.parse().unwrap()); 138 | let orderby = egraph.add_expr(&case.orderby.parse().unwrap()); 139 | match plan_select( 140 | &mut egraph, 141 | from, 142 | where_, 143 | having, 144 | groupby, 145 | orderby, 146 | projection, 147 | ) { 148 | Err(e) => assert_eq!(case.expected, Err(e)), 149 | Ok(id) => { 150 | let get_node = |id| egraph[id].nodes[0].clone(); 151 | let actual = get_node(id).build_recexpr(get_node).to_string(); 152 | let expected = case 153 | .expected 154 | .expect(&format!("expect error, but got: {actual:?}")) 155 | .parse::() 156 | .unwrap() 157 | .to_string(); 158 | assert_eq!(actual, expected); 159 | } 160 | } 161 | } 162 | --------------------------------------------------------------------------------