├── src
    ├── function
    │   ├── scalar
    │   │   └── mod.rs
    │   ├── mod.rs
    │   └── aggregate
    │   │   ├── count.rs
    │   │   ├── mod.rs
    │   │   └── avg.rs
    ├── sql
    │   ├── mod.rs
    │   ├── ast
    │   │   └── mod.rs
    │   └── parser
    │   │   └── mod.rs
    ├── plan
    │   ├── physical_planner
    │   │   └── mod.rs
    │   ├── logical_plan
    │   │   ├── analyze.rs
    │   │   ├── values.rs
    │   │   ├── create_table.rs
    │   │   ├── empty_relation.rs
    │   │   ├── drop_table.rs
    │   │   ├── filter.rs
    │   │   ├── create_index.rs
    │   │   ├── update.rs
    │   │   ├── limit.rs
    │   │   ├── delete.rs
    │   │   ├── table_scan.rs
    │   │   ├── aggregate.rs
    │   │   ├── project.rs
    │   │   ├── drop_index.rs
    │   │   ├── insert.rs
    │   │   ├── join.rs
    │   │   ├── sort.rs
    │   │   └── util.rs
    │   ├── mod.rs
    │   └── logical_planner
    │   │   ├── mod.rs
    │   │   ├── plan_create_index.rs
    │   │   ├── plan_explain.rs
    │   │   ├── plan_delete.rs
    │   │   ├── plan_insert.rs
    │   │   ├── plan_update.rs
    │   │   ├── plan_drop.rs
    │   │   ├── plan_create_table.rs
    │   │   └── plan_query.rs
    ├── storage
    │   ├── index
    │   │   └── mod.rs
    │   ├── io
    │   │   └── mod.rs
    │   ├── heap
    │   │   └── mod.rs
    │   ├── page
    │   │   ├── mod.rs
    │   │   └── freelist_page.rs
    │   ├── codec
    │   │   ├── mod.rs
    │   │   ├── tuple.rs
    │   │   ├── meta_page.rs
    │   │   └── freelist_page.rs
    │   └── mod.rs
    ├── tests
    │   ├── mod.rs
    │   ├── sql_example
    │   │   ├── create_index.slt
    │   │   ├── create_table.slt
    │   │   ├── analyze.slt
    │   │   ├── delete.slt
    │   │   ├── update.slt
    │   │   ├── drop.slt
    │   │   ├── insert.slt
    │   │   ├── show_explain.slt
    │   │   └── transaction.slt
    │   └── sql_test.rs
    ├── optimizer
    │   ├── mod.rs
    │   └── rule
    │   │   ├── mod.rs
    │   │   ├── push_down_filter.rs
    │   │   ├── eliminate_limit.rs
    │   │   ├── push_down_limit.rs
    │   │   └── merge_limit.rs
    ├── utils
    │   ├── mod.rs
    │   ├── cache
    │   │   ├── mod.rs
    │   │   └── tiny_lfu.rs
    │   ├── bitmap.rs
    │   └── table_ref.rs
    ├── buffer
    │   └── mod.rs
    ├── catalog
    │   ├── mod.rs
    │   ├── registry.rs
    │   └── column.rs
    ├── recovery
    │   ├── wal
    │   │   ├── record.rs
    │   │   ├── codec
    │   │   │   ├── clr.rs
    │   │   │   ├── txn.rs
    │   │   │   ├── page.rs
    │   │   │   └── checkpoint.rs
    │   │   ├── io.rs
    │   │   ├── writer.rs
    │   │   └── buffer.rs
    │   ├── mod.rs
    │   ├── redo.rs
    │   ├── analysis.rs
    │   ├── wal_record.rs
    │   └── resource_manager.rs
    ├── lib.rs
    ├── transaction
    │   ├── mod.rs
    │   └── lock_guard.rs
    ├── error.rs
    ├── expression
    │   ├── util.rs
    │   ├── literal.rs
    │   ├── alias.rs
    │   ├── cast.rs
    │   ├── column.rs
    │   └── aggregate.rs
    └── execution
    │   ├── physical_plan
    │       ├── create_table.rs
    │       ├── analyze.rs
    │       ├── filter.rs
    │       ├── project.rs
    │       ├── empty.rs
    │       ├── create_index.rs
    │       ├── values.rs
    │       ├── scan.rs
    │       ├── limit.rs
    │       ├── drop_table.rs
    │       ├── drop_index.rs
    │       ├── nested_loop_join.rs
    │       └── seq_scan.rs
    │   └── mod.rs
├── public
    ├── rust-db.png
    └── terminal-preview.svg
├── typos.toml
├── docs
    ├── src
    │   ├── assets
    │   │   └── rust-db.png
    │   ├── SUMMARY.md
    │   ├── modules
    │   │   ├── config.md
    │   │   ├── bin.md
    │   │   ├── tests.md
    │   │   ├── background.md
    │   │   ├── expression.md
    │   │   ├── execution.md
    │   │   ├── sql.md
    │   │   ├── buffer.md
    │   │   ├── plan.md
    │   │   ├── optimizer.md
    │   │   ├── index.md
    │   │   ├── catalog.md
    │   │   └── storage.md
    │   ├── introduction.md
    │   ├── buffer
    │   │   └── page.md
    │   ├── contributing.md
    │   └── storage
    │   │   └── disk_io.md
    ├── book.toml
    └── mermaid-init.js
├── .dockerignore
├── .gitignore
├── fly.toml
├── .github
    └── workflows
    │   ├── ci_typos.yml
    │   ├── mdbook.yml
    │   └── ci.yml
├── Dockerfile
├── LICENSE
└── Cargo.toml


/src/function/scalar/mod.rs:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/sql/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod ast;
2 | pub mod parser;
3 | 


--------------------------------------------------------------------------------
/src/function/mod.rs:
--------------------------------------------------------------------------------
1 | mod aggregate;
2 | mod scalar;
3 | 
4 | pub use aggregate::*;
5 | 


--------------------------------------------------------------------------------
/public/rust-db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feichai0017/QuillSQL/HEAD/public/rust-db.png


--------------------------------------------------------------------------------
/typos.toml:
--------------------------------------------------------------------------------
1 | [files]
2 | extend-exclude = ["docs/mermaid.min.js", "docs/mermaid-init.js"]
3 | 


--------------------------------------------------------------------------------
/docs/src/assets/rust-db.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feichai0017/QuillSQL/HEAD/docs/src/assets/rust-db.png


--------------------------------------------------------------------------------
/src/plan/physical_planner/mod.rs:
--------------------------------------------------------------------------------
1 | mod physical_planner;
2 | 
3 | pub use physical_planner::PhysicalPlanner;
4 | 


--------------------------------------------------------------------------------
/src/storage/index/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod btree_index;
2 | pub mod btree_iterator;
3 | pub mod index_recovery;
4 | pub mod wal_codec;
5 | 


--------------------------------------------------------------------------------
/src/tests/mod.rs:
--------------------------------------------------------------------------------
1 | #[cfg(test)]
2 | pub mod recovery_tests;
3 | pub mod sql_test;
4 | #[cfg(test)]
5 | pub mod transaction_tests;
6 | 


--------------------------------------------------------------------------------
/src/optimizer/mod.rs:
--------------------------------------------------------------------------------
1 | mod logical_optimizer;
2 | pub mod rule;
3 | 
4 | pub use logical_optimizer::{LogicalOptimizer, LogicalOptimizerRule};
5 | 


--------------------------------------------------------------------------------
/src/storage/io/mod.rs:
--------------------------------------------------------------------------------
1 | #[cfg(target_os = "linux")]
2 | pub mod io_uring;
3 | 
4 | #[cfg(not(target_os = "linux"))]
5 | pub mod block_io;
6 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/create_index.slt:
--------------------------------------------------------------------------------
1 | statement ok
2 | create table t1 (a int, b int)
3 | 
4 | statement ok
5 | create index idx1 on t1 (a)


--------------------------------------------------------------------------------
/src/tests/sql_example/create_table.slt:
--------------------------------------------------------------------------------
1 | statement ok
2 | create table t(v1 int, v2 int, v3 int)
3 | 
4 | statement ok
5 | create table if not exists t


--------------------------------------------------------------------------------
/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod bitmap;
2 | pub mod cache;
3 | pub mod scalar;
4 | pub mod table_ref;
5 | pub mod util;
6 | 
7 | pub mod ring_buffer;
8 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | target
 2 | .git
 3 | .github
 4 | **/*.rs.bk
 5 | **/*.swp
 6 | **/*.swo
 7 | **/.DS_Store
 8 | **/node_modules
 9 | **/tmp
10 | **/target
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/analyze.rs:
--------------------------------------------------------------------------------
1 | use crate::utils::table_ref::TableReference;
2 | 
3 | #[derive(Debug, Clone)]
4 | pub struct Analyze {
5 |     pub table: TableReference,
6 | }
7 | 


--------------------------------------------------------------------------------
/src/plan/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod logical_plan;
2 | mod logical_planner;
3 | mod physical_planner;
4 | 
5 | pub use logical_planner::{LogicalPlanner, PlannerContext};
6 | pub use physical_planner::PhysicalPlanner;
7 | 


--------------------------------------------------------------------------------
/src/storage/heap/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod heap_recovery;
2 | pub mod mvcc_heap;
3 | pub mod table_heap;
4 | pub mod wal_codec;
5 | 
6 | pub use mvcc_heap::MvccHeap;
7 | pub use table_heap::{TableHeap, TableIterator};
8 | 


--------------------------------------------------------------------------------
/src/storage/page/mod.rs:
--------------------------------------------------------------------------------
 1 | mod btree_page;
 2 | mod freelist_page;
 3 | mod meta_page;
 4 | mod table_page;
 5 | 
 6 | pub use btree_page::*;
 7 | pub use freelist_page::*;
 8 | pub use meta_page::*;
 9 | pub use table_page::*;
10 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/analyze.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table analyze_t (v int)
 3 | 
 4 | statement ok
 5 | insert into analyze_t values (1), (2), (3)
 6 | 
 7 | statement ok
 8 | analyze table analyze_t
 9 | 
10 | query I
11 | select count(*) from analyze_t
12 | ----
13 | 3
14 | 


--------------------------------------------------------------------------------
/docs/book.toml:
--------------------------------------------------------------------------------
 1 | [book]
 2 | authors = ["The QuillSQL Team"]
 3 | language = "en"
 4 | src = "src"
 5 | title = "QuillSQL Internals"
 6 | 
 7 | [preprocessor.mermaid]
 8 | command = "mdbook-mermaid"
 9 | 
10 | [output.html]
11 | site-url = "/QuillSQL/"
12 | 
13 | additional-js = ["mermaid.min.js", "mermaid-init.js"]
14 | 


--------------------------------------------------------------------------------
/src/buffer/mod.rs:
--------------------------------------------------------------------------------
 1 | mod buffer_manager;
 2 | mod buffer_pool;
 3 | mod page;
 4 | 
 5 | pub use buffer_manager::BufferManager;
 6 | pub use buffer_pool::{BufferPool, FrameId, BUFFER_POOL_SIZE};
 7 | pub use page::{
 8 |     AtomicPageId, PageId, PageMeta, ReadPageGuard, WritePageGuard, INVALID_PAGE_ID, PAGE_SIZE,
 9 | };
10 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/mod.rs:
--------------------------------------------------------------------------------
 1 | mod bind_expr;
 2 | mod logical_planner;
 3 | mod plan_create_index;
 4 | mod plan_create_table;
 5 | mod plan_delete;
 6 | mod plan_drop;
 7 | mod plan_explain;
 8 | mod plan_insert;
 9 | mod plan_query;
10 | mod plan_set_expr;
11 | mod plan_update;
12 | 
13 | pub use logical_planner::{LogicalPlanner, PlannerContext};
14 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/delete.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table d1 (
 3 |     id int primary key,
 4 |     value int
 5 | )
 6 | 
 7 | statement ok
 8 | insert into d1 values (1, 10), (2, 20), (3, 30)
 9 | 
10 | statement ok
11 | delete from d1 where id = 2
12 | 
13 | query
14 | select * from d1 order by id
15 | ----
16 | 1 10
17 | 3 30
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/src/catalog/mod.rs:
--------------------------------------------------------------------------------
 1 | mod catalog;
 2 | mod column;
 3 | mod data_type;
 4 | mod information;
 5 | pub mod registry;
 6 | mod schema;
 7 | mod stats;
 8 | 
 9 | pub use catalog::*;
10 | pub use column::{Column, ColumnRef};
11 | pub use data_type::DataType;
12 | pub use information::*;
13 | pub use registry::*;
14 | pub use schema::*;
15 | pub use stats::*;
16 | 


--------------------------------------------------------------------------------
/src/optimizer/rule/mod.rs:
--------------------------------------------------------------------------------
 1 | mod eliminate_limit;
 2 | mod merge_limit;
 3 | mod push_down_filter;
 4 | mod push_down_limit;
 5 | mod push_limit_to_scan;
 6 | 
 7 | pub use eliminate_limit::EliminateLimit;
 8 | pub use merge_limit::MergeLimit;
 9 | pub use push_down_filter::PushDownFilterToScan;
10 | pub use push_down_limit::PushDownLimit;
11 | pub use push_limit_to_scan::PushLimitIntoScan;
12 | 


--------------------------------------------------------------------------------
/src/recovery/wal/record.rs:
--------------------------------------------------------------------------------
 1 | use bytes::Bytes;
 2 | 
 3 | use crate::recovery::wal::Lsn;
 4 | 
 5 | #[derive(Clone, Debug)]
 6 | pub struct WalRecord {
 7 |     pub start_lsn: Lsn,
 8 |     pub end_lsn: Lsn,
 9 |     pub payload: Bytes,
10 | }
11 | 
12 | impl WalRecord {
13 |     pub fn encoded_len(&self) -> u64 {
14 |         self.end_lsn.saturating_sub(self.start_lsn)
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | pub mod background;
 2 | pub mod buffer;
 3 | pub mod catalog;
 4 | pub mod config;
 5 | pub mod database;
 6 | pub mod error;
 7 | pub mod execution;
 8 | pub mod expression;
 9 | pub mod function;
10 | pub mod optimizer;
11 | pub mod plan;
12 | pub mod recovery;
13 | pub mod session;
14 | pub mod sql;
15 | pub mod storage;
16 | pub mod tests;
17 | pub mod transaction;
18 | pub mod utils;
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Generated by Cargo
 2 | # will have compiled files and executables
 3 | debug/
 4 | target/
 5 | 
 6 | 
 7 | # These are backup files generated by rustfmt
 8 | **/*.rs.bk
 9 | 
10 | # MSVC Windows builds of rustc generate these, which store debugging information
11 | *.pdb
12 | 
13 | 
14 | # Added by cargo
15 | 
16 | /target
17 | .vscode
18 | .github
19 | .history
20 | AGENTS.md
21 | GEMINI.md
22 | /docs/book
23 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/values.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | 
 4 | #[derive(derive_new::new, Debug, Clone)]
 5 | pub struct Values {
 6 |     pub schema: SchemaRef,
 7 |     pub values: Vec<Vec<Expr>>,
 8 | }
 9 | 
10 | impl std::fmt::Display for Values {
11 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
12 |         write!(f, "Values")
13 |     }
14 | }
15 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/create_table.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::Column;
 2 | use crate::utils::table_ref::TableReference;
 3 | 
 4 | #[derive(Debug, Clone)]
 5 | pub struct CreateTable {
 6 |     pub name: TableReference,
 7 |     pub columns: Vec<Column>,
 8 |     pub if_not_exists: bool,
 9 | }
10 | 
11 | impl std::fmt::Display for CreateTable {
12 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13 |         write!(f, "CreateTable: {}", self.name)
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/update.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table t_update (
 3 |     id int,
 4 |     v int
 5 | )
 6 | 
 7 | statement ok
 8 | insert into t_update values (1, 5), (2, 10)
 9 | 
10 | statement ok
11 | update t_update set v = v + 1
12 | 
13 | query
14 | select id, v from t_update order by id
15 | ----
16 | 1 6
17 | 2 11
18 | 
19 | statement ok
20 | update t_update set v = v + 2 where id = 1
21 | 
22 | query
23 | select id, v from t_update order by id
24 | ----
25 | 1 8
26 | 2 11


--------------------------------------------------------------------------------
/src/plan/logical_plan/empty_relation.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | 
 3 | #[derive(Debug, Clone)]
 4 | pub struct EmptyRelation {
 5 |     /// Whether to produce a placeholder row
 6 |     pub produce_one_row: bool,
 7 |     /// The schema description of the output
 8 |     pub schema: SchemaRef,
 9 | }
10 | 
11 | impl std::fmt::Display for EmptyRelation {
12 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13 |         write!(f, "EmptyRelation")
14 |     }
15 | }
16 | 


--------------------------------------------------------------------------------
/src/storage/codec/mod.rs:
--------------------------------------------------------------------------------
 1 | mod btree_page;
 2 | mod common;
 3 | mod freelist_page;
 4 | mod meta_page;
 5 | mod scalar;
 6 | mod table_page;
 7 | mod tuple;
 8 | 
 9 | pub use btree_page::*;
10 | pub use common::CommonCodec;
11 | pub use freelist_page::{FreelistPageCodec, FreelistPageHeaderCodec};
12 | pub use meta_page::MetaPageCodec;
13 | pub use scalar::ScalarValueCodec;
14 | pub use table_page::*;
15 | pub use tuple::TupleCodec;
16 | 
17 | // data + consumed offset
18 | pub type DecodedData<T> = (T, usize);
19 | 


--------------------------------------------------------------------------------
/src/storage/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod codec;
 2 | pub mod disk_manager;
 3 | pub mod disk_scheduler;
 4 | pub mod engine;
 5 | pub mod heap;
 6 | pub mod index;
 7 | pub mod io;
 8 | pub mod page;
 9 | pub mod tuple;
10 | 
11 | pub use engine::{
12 |     DefaultStorageEngine, IndexHandle, IndexScanRequest, StorageEngine, TableBinding, TableHandle,
13 |     TupleStream,
14 | };
15 | 
16 | pub use heap::heap_recovery;
17 | pub use heap::mvcc_heap::{self, MvccHeap};
18 | pub use heap::table_heap;
19 | pub use heap::table_heap::{TableHeap, TableIterator};
20 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/drop.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table drop_t (id int primary key)
 3 | 
 4 | statement ok
 5 | create index drop_t_idx on drop_t(id)
 6 | 
 7 | statement ok
 8 | drop index drop_t_idx
 9 | 
10 | statement ok
11 | drop index if exists drop_t_idx
12 | 
13 | statement ok
14 | create index drop_t_idx on drop_t(id)
15 | 
16 | statement ok
17 | drop table drop_t
18 | 
19 | statement ok
20 | drop table if exists drop_t
21 | 
22 | statement error
23 | drop table drop_t
24 | 
25 | statement error
26 | drop index drop_t_idx
27 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/drop_table.rs:
--------------------------------------------------------------------------------
 1 | use crate::utils::table_ref::TableReference;
 2 | 
 3 | #[derive(Debug, Clone)]
 4 | pub struct DropTable {
 5 |     pub name: TableReference,
 6 |     pub if_exists: bool,
 7 | }
 8 | 
 9 | impl std::fmt::Display for DropTable {
10 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
11 |         if self.if_exists {
12 |             write!(f, "DropTable IF EXISTS: {}", self.name)
13 |         } else {
14 |             write!(f, "DropTable: {}", self.name)
15 |         }
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/filter.rs:
--------------------------------------------------------------------------------
 1 | use crate::expression::Expr;
 2 | use crate::plan::logical_plan::LogicalPlan;
 3 | use std::sync::Arc;
 4 | 
 5 | #[derive(derive_new::new, Debug, Clone)]
 6 | pub struct Filter {
 7 |     /// The predicate expression, which must have Boolean type.
 8 |     pub predicate: Expr,
 9 |     /// The incoming logical plan
10 |     pub input: Arc<LogicalPlan>,
11 | }
12 | 
13 | impl std::fmt::Display for Filter {
14 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15 |         write!(f, "Filter: {}", self.predicate)
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/transaction/mod.rs:
--------------------------------------------------------------------------------
 1 | mod lock_guard;
 2 | mod lock_manager;
 3 | mod mvcc;
 4 | mod transaction;
 5 | mod transaction_manager;
 6 | mod txn_context;
 7 | 
 8 | pub use lock_guard::{RowLockGuard, TxnReadGuard};
 9 | pub use lock_manager::{LockDebugSnapshot, LockManager, LockMode};
10 | pub use mvcc::{TransactionSnapshot, TransactionStatus};
11 | pub use transaction::{
12 |     CommandId, IsolationLevel, Transaction, TransactionId, TransactionState, INVALID_COMMAND_ID,
13 | };
14 | pub use transaction_manager::{TransactionManager, TxnDebugSnapshot};
15 | pub use txn_context::TxnContext;
16 | 


--------------------------------------------------------------------------------
/fly.toml:
--------------------------------------------------------------------------------
 1 | app = "quillsql"         
 2 | primary_region = "sin"           
 3 | 
 4 | [build]
 5 |   dockerfile = "Dockerfile"
 6 | 
 7 | [env]
 8 |   PORT = "8080"
 9 |   RUST_LOG = "info"
10 |   # QUILL_DB_FILE = "/data/quill.db"  
11 |   QUILL_DISABLE_DIRECT_IO = "1"
12 | 
13 | [http_service]
14 |   internal_port = 8080
15 |   force_https = true
16 |   auto_start_machines = true
17 |   auto_stop_machines = "off"
18 |   min_machines_running = 1
19 | 
20 | [[vm]]
21 |   cpu_kind = "shared"
22 |   cpus = 1
23 |   memory = "512mb"
24 | 
25 | # [[mounts]]
26 | #   source = "data"
27 | #   destination = "/data"


--------------------------------------------------------------------------------
/src/plan/logical_plan/create_index.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::plan::logical_plan::OrderByExpr;
 3 | use crate::utils::table_ref::TableReference;
 4 | 
 5 | #[derive(derive_new::new, Debug, Clone)]
 6 | pub struct CreateIndex {
 7 |     pub index_name: String,
 8 |     pub table: TableReference,
 9 |     pub table_schema: SchemaRef,
10 |     pub columns: Vec<OrderByExpr>,
11 | }
12 | 
13 | impl std::fmt::Display for CreateIndex {
14 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15 |         write!(f, "CreateIndex: {}", self.index_name)
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/update.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::utils::table_ref::TableReference;
 4 | use std::collections::HashMap;
 5 | 
 6 | #[derive(derive_new::new, Debug, Clone)]
 7 | pub struct Update {
 8 |     pub table: TableReference,
 9 |     pub table_schema: SchemaRef,
10 |     pub assignments: HashMap<String, Expr>,
11 |     pub selection: Option<Expr>,
12 | }
13 | 
14 | impl std::fmt::Display for Update {
15 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16 |         write!(f, "Update: {}", self.table,)
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/limit.rs:
--------------------------------------------------------------------------------
 1 | use crate::plan::logical_plan::LogicalPlan;
 2 | use std::sync::Arc;
 3 | 
 4 | #[derive(derive_new::new, Debug, Clone)]
 5 | pub struct Limit {
 6 |     pub limit: Option<usize>,
 7 |     pub offset: usize,
 8 |     pub input: Arc<LogicalPlan>,
 9 | }
10 | 
11 | impl std::fmt::Display for Limit {
12 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
13 |         write!(
14 |             f,
15 |             "Limit: {}, offset: {}",
16 |             self.limit.map_or("None".to_string(), |v| v.to_string()),
17 |             self.offset
18 |         )
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/.github/workflows/ci_typos.yml:
--------------------------------------------------------------------------------
 1 | name: Typos Check
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
13 |   cancel-in-progress: true
14 | 
15 | env:
16 |   RUST_BACKTRACE: 1
17 | 
18 | jobs:
19 |   typos-check:
20 |     name: typos check
21 |     runs-on: ubuntu-latest
22 |     timeout-minutes: 10
23 |     env:
24 |       FORCE_COLOR: 1
25 |     steps:
26 |       - uses: actions/checkout@v5
27 |       - name: Check typos
28 |         uses: crate-ci/typos@v1.37.2
29 | 


--------------------------------------------------------------------------------
/src/recovery/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod analysis;
 2 | pub mod control_file;
 3 | pub mod recovery_manager;
 4 | pub mod redo;
 5 | pub mod resource_manager;
 6 | pub mod undo;
 7 | pub mod wal;
 8 | pub mod wal_record;
 9 | 
10 | pub use control_file::{ControlFileManager, ControlFileSnapshot, WalInitState};
11 | pub use recovery_manager::RecoveryManager;
12 | pub use wal::{Lsn, WalAppendContext, WalAppendResult, WalManager, WalReader, WalWriterHandle};
13 | pub use wal_record::{
14 |     decode_frame, CheckpointPayload, PageWritePayload, ResourceManagerId, TransactionPayload,
15 |     TransactionRecordKind, WalFrame, WalRecordPayload,
16 | };
17 | 


--------------------------------------------------------------------------------
/src/utils/cache/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::buffer::FrameId;
 2 | use crate::error::QuillSQLResult;
 3 | 
 4 | pub mod clock_lru;
 5 | pub mod lru_k;
 6 | pub mod tiny_lfu;
 7 | pub mod window_lfu;
 8 | 
 9 | pub trait Replacer {
10 |     fn new(capacity: usize) -> Self
11 |     where
12 |         Self: Sized;
13 | 
14 |     fn record_access(&mut self, frame_id: FrameId) -> QuillSQLResult<()>;
15 | 
16 |     fn evict(&mut self) -> Option<FrameId>;
17 | 
18 |     fn set_evictable(&mut self, frame_id: FrameId, set_evictable: bool) -> QuillSQLResult<()>;
19 | 
20 |     fn remove(&mut self, frame_id: FrameId);
21 | 
22 |     fn size(&self) -> usize;
23 | }
24 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/delete.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::utils::table_ref::TableReference;
 4 | 
 5 | #[derive(derive_new::new, Debug, Clone)]
 6 | pub struct Delete {
 7 |     /// Target table reference
 8 |     pub table: TableReference,
 9 |     /// Cached schema for the table heap
10 |     pub table_schema: SchemaRef,
11 |     /// Optional predicate bound during planning
12 |     pub selection: Option<Expr>,
13 | }
14 | 
15 | impl std::fmt::Display for Delete {
16 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
17 |         write!(f, "Delete: {}", self.table)
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/sql/ast/mod.rs:
--------------------------------------------------------------------------------
 1 | // This acts as an abstraction layer, allowing easier parser replacement later.
 2 | pub use sqlparser::ast::BinaryOperator;
 3 | pub use sqlparser::ast::ColumnDef;
 4 | pub use sqlparser::ast::DataType;
 5 | pub use sqlparser::ast::Expr;
 6 | pub use sqlparser::ast::Function;
 7 | pub use sqlparser::ast::FunctionArg;
 8 | pub use sqlparser::ast::FunctionArgExpr;
 9 | pub use sqlparser::ast::ObjectName;
10 | pub use sqlparser::ast::OrderByExpr;
11 | pub use sqlparser::ast::Statement;
12 | pub use sqlparser::ast::TableConstraint;
13 | pub use sqlparser::ast::TransactionMode;
14 | pub use sqlparser::ast::UnaryOperator;
15 | pub use sqlparser::ast::Value;
16 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/table_scan.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::utils::table_ref::TableReference;
 4 | 
 5 | #[derive(derive_new::new, Debug, Clone)]
 6 | pub struct TableScan {
 7 |     pub table_ref: TableReference,
 8 |     pub table_schema: SchemaRef,
 9 |     pub filters: Vec<Expr>,
10 |     pub limit: Option<usize>,
11 |     /// Row-count estimate attached by the planner (from ANALYZE).
12 |     pub estimated_row_count: Option<u64>,
13 | }
14 | 
15 | impl std::fmt::Display for TableScan {
16 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
17 |         write!(f, "TableScan: {}", self.table_ref)
18 |     }
19 | }
20 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/aggregate.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::plan::logical_plan::LogicalPlan;
 4 | use std::sync::Arc;
 5 | 
 6 | #[derive(Debug, Clone)]
 7 | pub struct Aggregate {
 8 |     /// The incoming logical plan
 9 |     pub input: Arc<LogicalPlan>,
10 |     /// Grouping expressions
11 |     pub group_exprs: Vec<Expr>,
12 |     /// Aggregate expressions
13 |     pub aggr_exprs: Vec<Expr>,
14 |     /// The schema description of the aggregate output
15 |     pub schema: SchemaRef,
16 | }
17 | 
18 | impl std::fmt::Display for Aggregate {
19 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20 |         write!(f, "Aggregate")
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/function/aggregate/count.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::function::aggregate::Accumulator;
 3 | use crate::utils::scalar::ScalarValue;
 4 | 
 5 | #[derive(Debug, Clone)]
 6 | pub struct CountAccumulator {
 7 |     count: i64,
 8 | }
 9 | 
10 | impl CountAccumulator {
11 |     pub fn new() -> Self {
12 |         Self { count: 0 }
13 |     }
14 | }
15 | 
16 | impl Accumulator for CountAccumulator {
17 |     fn update_value(&mut self, value: &ScalarValue) -> QuillSQLResult<()> {
18 |         if !value.is_null() {
19 |             self.count += 1;
20 |         }
21 |         Ok(())
22 |     }
23 | 
24 |     fn evaluate(&self) -> QuillSQLResult<ScalarValue> {
25 |         Ok(self.count.into())
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/project.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::plan::logical_plan::LogicalPlan;
 4 | use std::sync::Arc;
 5 | 
 6 | #[derive(derive_new::new, Debug, Clone)]
 7 | pub struct Project {
 8 |     pub exprs: Vec<Expr>,
 9 |     pub input: Arc<LogicalPlan>,
10 |     pub schema: SchemaRef,
11 | }
12 | 
13 | impl std::fmt::Display for Project {
14 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
15 |         write!(
16 |             f,
17 |             "Project: {}",
18 |             self.exprs
19 |                 .iter()
20 |                 .map(|e| format!("{e}"))
21 |                 .collect::<Vec<_>>()
22 |                 .join(", ")
23 |         )
24 |     }
25 | }
26 | 


--------------------------------------------------------------------------------
/.github/workflows/mdbook.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy mdBook
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   deploy-book:
10 |     runs-on: ubuntu-latest
11 |     permissions:
12 |       contents: write
13 |     steps:
14 |       - name: Checkout
15 |         uses: actions/checkout@v4
16 | 
17 |       - name: Install mdbook and mdbook-mermaid
18 |         run: |
19 |           cargo install mdbook --no-default-features
20 |           cargo install mdbook-mermaid
21 | 
22 |       - name: Build mdbook
23 |         run: mdbook build docs
24 | 
25 |       - name: Deploy to GitHub Pages
26 |         uses: peaceiris/actions-gh-pages@v3
27 |         with:
28 |           github_token: ${{ secrets.GITHUB_TOKEN }}
29 |           publish_dir: ./docs/book
30 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM rust:1.82 as builder
 2 | 
 3 | # Install and use the nightly toolchain to support Edition 2024 dependencies
 4 | RUN rustup toolchain install nightly
 5 | RUN rustup default nightly
 6 | WORKDIR /app
 7 | 
 8 | # Pre-cache deps
 9 | COPY Cargo.toml Cargo.lock ./
10 | RUN mkdir -p src/bin && echo "fn main(){}" > src/bin/dummy.rs && cargo build --release || true
11 | 
12 | # Build
13 | COPY . .
14 | RUN cargo build --release --bin server
15 | 
16 | FROM gcr.io/distroless/cc-debian12:nonroot
17 | USER nonroot
18 | WORKDIR /app
19 | COPY --from=builder /app/target/release/server /usr/local/bin/server
20 | COPY --from=builder /app/public /app/public
21 | COPY --from=builder /app/docs /app/docs
22 | ENV QUILL_HTTP_ADDR=0.0.0.0:8080
23 | EXPOSE 8080
24 | CMD ["/usr/local/bin/server"]
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/insert.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table t1 (
 3 |     a tinyint,
 4 |     b smallint,
 5 |     c integer,
 6 |     d bigint,
 7 |     e tinyint unsigned,
 8 |     f smallint unsigned,
 9 |     g integer unsigned,
10 |     h bigint unsigned,
11 |     i float,
12 |     j varchar
13 | )
14 | 
15 | statement ok
16 | insert into t1 values
17 |     (1, 2, 3, 4, 5, 6, 7, 8, 1.1, 'a'),
18 |     (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
19 | 
20 | query
21 | select * from t1
22 | ----
23 | 1 2 3 4 5 6 7 8 1.1 a
24 | NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
25 | 
26 | 
27 | statement ok
28 | create table t2 (
29 |     a tinyint,
30 |     b integer default 1,
31 | )
32 | 
33 | statement ok
34 | insert into t2(a) values (1)
35 | 
36 | query
37 | select * from t2
38 | ----
39 | 1 1


--------------------------------------------------------------------------------
/src/plan/logical_plan/drop_index.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone)]
 2 | pub struct DropIndex {
 3 |     pub name: String,
 4 |     pub schema: Option<String>,
 5 |     pub catalog: Option<String>,
 6 |     pub if_exists: bool,
 7 | }
 8 | 
 9 | impl std::fmt::Display for DropIndex {
10 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
11 |         let qualified = match (&self.catalog, &self.schema) {
12 |             (Some(catalog), Some(schema)) => format!("{catalog}.{schema}.{}", self.name),
13 |             (None, Some(schema)) => format!("{schema}.{}", self.name),
14 |             _ => self.name.clone(),
15 |         };
16 |         if self.if_exists {
17 |             write!(f, "DropIndex IF EXISTS: {qualified}")
18 |         } else {
19 |             write!(f, "DropIndex: {qualified}")
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/insert.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::plan::logical_plan::LogicalPlan;
 3 | use crate::utils::table_ref::TableReference;
 4 | use std::sync::Arc;
 5 | 
 6 | #[derive(derive_new::new, Debug, Clone)]
 7 | pub struct Insert {
 8 |     pub table: TableReference,
 9 |     pub table_schema: SchemaRef,
10 |     pub projected_schema: SchemaRef,
11 |     pub input: Arc<LogicalPlan>,
12 | }
13 | 
14 | impl std::fmt::Display for Insert {
15 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
16 |         write!(
17 |             f,
18 |             "Insert: {} ({})",
19 |             self.table,
20 |             self.projected_schema
21 |                 .columns
22 |                 .iter()
23 |                 .map(|c| c.name.clone())
24 |                 .collect::<Vec<_>>()
25 |                 .join(", ")
26 |         )
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | pub type QuillSQLResult<T, E = QuillSQLError> = Result<T, E>;
 4 | 
 5 | #[derive(Debug, Error)]
 6 | pub enum QuillSQLError {
 7 |     #[error("Not support: {0}")]
 8 |     NotSupport(String),
 9 | 
10 |     #[error("Internal error: {0}")]
11 |     Internal(String),
12 | 
13 |     #[error("IO error: {0}")]
14 |     Io(#[from] std::io::Error),
15 | 
16 |     #[error("Parser error: {0}")]
17 |     Parser(#[from] sqlparser::parser::ParserError),
18 | 
19 |     #[error("Bincode error: {0}")]
20 |     Bincode(#[from] bincode::Error),
21 | 
22 |     #[error("Plan error: {0}")]
23 |     Plan(String),
24 | 
25 |     #[error("Execution error: {0}")]
26 |     Execution(String),
27 | 
28 |     #[error("Storage error: {0}")]
29 |     Storage(String),
30 | 
31 |     #[error("Concurrent error: {0}")]
32 |     Concurrent(String),
33 | 
34 |     #[error("Unwind")]
35 |     Unwind,
36 | }
37 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   pull_request:
 7 | 
 8 | jobs:
 9 |   rust:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - uses: actions/checkout@v4
13 | 
14 |       - name: Install nightly toolchain
15 |         uses: dtolnay/rust-toolchain@nightly
16 |         with:
17 |           components: rustfmt, clippy
18 | 
19 |       - uses: Swatinem/rust-cache@v2
20 | 
21 |       - name: fmt
22 |         run: cargo fmt --all -- --check
23 | 
24 |       - name: clippy
25 |         run: cargo clippy --all-targets
26 | 
27 |       - name: test
28 |         run: cargo test -q
29 | 
30 |   docker-build:
31 |     runs-on: ubuntu-latest
32 |     needs: rust
33 |     steps:
34 |       - uses: actions/checkout@v4
35 | 
36 |       # 仅构建镜像，不在 Dockerfile 里跑测试
37 |       - name: Build Docker image (no push)
38 |         run: docker build --pull -t quillsql-ci .
39 | 


--------------------------------------------------------------------------------
/src/catalog/registry.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use dashmap::DashMap;
 4 | 
 5 | use crate::storage::table_heap::TableHeap;
 6 | use crate::utils::table_ref::TableReference;
 7 | 
 8 | /// Registry of table heaps that may require background maintenance.
 9 | #[derive(Debug, Default)]
10 | pub struct TableRegistry {
11 |     inner: DashMap<TableReference, Arc<TableHeap>>,
12 | }
13 | 
14 | impl TableRegistry {
15 |     pub fn new() -> Self {
16 |         Self {
17 |             inner: DashMap::new(),
18 |         }
19 |     }
20 | 
21 |     pub fn register(&self, table: TableReference, heap: Arc<TableHeap>) {
22 |         self.inner.insert(table, heap);
23 |     }
24 | 
25 |     pub fn unregister(&self, table: &TableReference) {
26 |         self.inner.remove(table);
27 |     }
28 | 
29 |     pub fn iter_tables(&self) -> impl Iterator<Item = (TableReference, Arc<TableHeap>)> + '_ {
30 |         self.inner
31 |             .iter()
32 |             .map(|entry| (entry.key().clone(), entry.value().clone()))
33 |     }
34 | }
35 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/show_explain.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | CREATE TABLE t(id INT, v INT)
 3 | 
 4 | statement ok
 5 | INSERT INTO t(id, v) VALUES (1, 10), (2, 20), (3, 30)
 6 | 
 7 | # SHOW DATABASES should at least include 'public'
 8 | query T
 9 | SELECT schema FROM information_schema.schemas WHERE schema = 'public'
10 | ----
11 | public
12 | 
13 | # SHOW TABLES should include user table 't' (using information_schema for stability)
14 | query T
15 | SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' and table_name = 't'
16 | ----
17 | t
18 | 
19 | # EXPLAIN returns lines of plan text
20 | query T
21 | EXPLAIN SELECT id FROM t WHERE v > 10 ORDER BY id LIMIT 2 OFFSET 1
22 | ----
23 | Limit: 2, offset: 1
24 |   Sort: id ASC NULLS LAST
25 |     Project: id
26 |       Filter: (v Gt 10)
27 |         TableScan: t
28 | 
29 | query T
30 | EXPLAIN SELECT id, COUNT(*) FROM t GROUP BY id ORDER BY id
31 | ----
32 | Sort: id ASC NULLS LAST
33 |   Project: id, Count
34 |     Aggregate
35 |       TableScan: t
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/src/catalog/column.rs:
--------------------------------------------------------------------------------
 1 | use derive_with::With;
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::catalog::DataType;
 5 | use crate::utils::scalar::ScalarValue;
 6 | use crate::utils::table_ref::TableReference;
 7 | 
 8 | pub type ColumnRef = Arc<Column>;
 9 | 
10 | #[derive(Debug, Clone, With)]
11 | pub struct Column {
12 |     pub relation: Option<TableReference>,
13 |     pub name: String,
14 |     pub data_type: DataType,
15 |     pub nullable: bool,
16 |     pub default: ScalarValue,
17 | }
18 | 
19 | impl PartialEq for Column {
20 |     fn eq(&self, other: &Self) -> bool {
21 |         self.name == other.name && self.data_type == other.data_type
22 |     }
23 | }
24 | 
25 | impl Eq for Column {}
26 | 
27 | impl Column {
28 |     pub fn new(name: impl Into<String>, data_type: DataType, nullable: bool) -> Self {
29 |         Self {
30 |             relation: None,
31 |             name: name.into(),
32 |             data_type,
33 |             nullable,
34 |             default: ScalarValue::new_empty(data_type),
35 |         }
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/expression/util.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::error::QuillSQLResult;
 3 | use crate::expression::{Alias, Cast, ColumnExpr, Expr};
 4 | 
 5 | /// Convert an expression into Column expression
 6 | pub fn columnize_expr(e: &Expr, input_schema: &SchemaRef) -> QuillSQLResult<Expr> {
 7 |     match e {
 8 |         Expr::Column(_) => Ok(e.clone()),
 9 |         Expr::Alias(Alias { expr, name }) => Ok(Expr::Alias(Alias {
10 |             expr: Box::new(columnize_expr(expr, input_schema)?),
11 |             name: name.clone(),
12 |         })),
13 |         Expr::Cast(Cast { expr, data_type }) => Ok(Expr::Cast(Cast {
14 |             expr: Box::new(columnize_expr(expr, input_schema)?),
15 |             data_type: *data_type,
16 |         })),
17 |         _ => {
18 |             let name = e.to_string();
19 |             let idx = input_schema.index_of(None, name.as_str())?;
20 |             let col = input_schema.column_with_index(idx)?;
21 |             Ok(Expr::Column(ColumnExpr {
22 |                 relation: col.relation.clone(),
23 |                 name,
24 |             }))
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 eric_song
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/tests/sql_example/transaction.slt:
--------------------------------------------------------------------------------
 1 | statement ok
 2 | create table accounts(id integer primary key, balance integer)
 3 | 
 4 | statement ok
 5 | insert into accounts values (1, 100), (2, 200)
 6 | 
 7 | statement ok
 8 | update accounts set balance = 150 where id = 1
 9 | 
10 | query
11 | select * from accounts order by id
12 | ----
13 | 1 150
14 | 2 200
15 | 
16 | statement ok
17 | begin
18 | 
19 | statement ok
20 | update accounts set balance = 175 where id = 2
21 | 
22 | statement ok
23 | rollback
24 | 
25 | query
26 | select * from accounts order by id
27 | ----
28 | 1 150
29 | 2 200
30 | 
31 | statement ok
32 | set transaction isolation level serializable
33 | 
34 | statement ok
35 | begin
36 | 
37 | statement ok
38 | update accounts set balance = 160 where id = 1
39 | 
40 | statement ok
41 | commit
42 | 
43 | query
44 | select * from accounts order by id
45 | ----
46 | 1 160
47 | 2 200
48 | 
49 | statement ok
50 | set session transaction isolation level read committed
51 | 
52 | statement ok
53 | begin
54 | 
55 | statement ok
56 | update accounts set balance = 195 where id = 2
57 | 
58 | statement ok
59 | commit
60 | 
61 | query
62 | select * from accounts order by id
63 | ----
64 | 1 160
65 | 2 195
66 | 


--------------------------------------------------------------------------------
/src/expression/literal.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::Schema;
 2 | use crate::catalog::{Column, DataType};
 3 | use crate::error::QuillSQLResult;
 4 | use crate::expression::ExprTrait;
 5 | use crate::storage::tuple::Tuple;
 6 | use crate::utils::scalar::ScalarValue;
 7 | 
 8 | #[derive(Debug, Clone, PartialEq, Eq)]
 9 | pub struct Literal {
10 |     pub value: ScalarValue,
11 | }
12 | 
13 | impl ExprTrait for Literal {
14 |     fn data_type(&self, _input_schema: &Schema) -> QuillSQLResult<DataType> {
15 |         Ok(self.value.data_type())
16 |     }
17 | 
18 |     fn nullable(&self, _input_schema: &Schema) -> QuillSQLResult<bool> {
19 |         Ok(self.value.is_null())
20 |     }
21 | 
22 |     fn evaluate(&self, _tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
23 |         Ok(self.value.clone())
24 |     }
25 | 
26 |     fn to_column(&self, input_schema: &Schema) -> QuillSQLResult<Column> {
27 |         Ok(Column::new(
28 |             format!("{}", self.value),
29 |             self.data_type(input_schema)?,
30 |             self.nullable(input_schema)?,
31 |         ))
32 |     }
33 | }
34 | 
35 | impl std::fmt::Display for Literal {
36 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 |         write!(f, "{}", self.value)
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/create_table.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::utils::table_ref::TableReference;
 3 | use crate::{
 4 |     catalog::Schema,
 5 |     error::QuillSQLResult,
 6 |     execution::{ExecutionContext, VolcanoExecutor},
 7 |     storage::tuple::Tuple,
 8 | };
 9 | use std::sync::Arc;
10 | 
11 | #[derive(derive_new::new, Debug)]
12 | pub struct PhysicalCreateTable {
13 |     pub table: TableReference,
14 |     pub schema: Schema,
15 |     pub if_not_exists: bool,
16 | }
17 | 
18 | impl VolcanoExecutor for PhysicalCreateTable {
19 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
20 |         if self.if_not_exists && context.catalog.try_table_heap(&self.table).is_some() {
21 |             return Ok(None);
22 |         }
23 | 
24 |         context
25 |             .catalog
26 |             .create_table(self.table.clone(), Arc::new(self.schema.clone()))?;
27 |         Ok(None)
28 |     }
29 |     fn output_schema(&self) -> SchemaRef {
30 |         Arc::new(self.schema.clone())
31 |     }
32 | }
33 | 
34 | impl std::fmt::Display for PhysicalCreateTable {
35 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
36 |         write!(f, "CreateTable: {}", self.table)
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/recovery/redo.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use crate::buffer::BufferManager;
 4 | use crate::error::QuillSQLResult;
 5 | use crate::recovery::resource_manager::{
 6 |     ensure_default_resource_managers_registered, get_resource_manager, RedoContext,
 7 | };
 8 | use crate::recovery::wal::codec::WalFrame;
 9 | use crate::storage::disk_scheduler::DiskScheduler;
10 | 
11 | pub struct RedoExecutor {
12 |     disk_scheduler: Arc<DiskScheduler>,
13 |     buffer_pool: Option<Arc<BufferManager>>,
14 | }
15 | 
16 | impl RedoExecutor {
17 |     pub fn new(
18 |         disk_scheduler: Arc<DiskScheduler>,
19 |         buffer_pool: Option<Arc<BufferManager>>,
20 |     ) -> Self {
21 |         ensure_default_resource_managers_registered();
22 |         Self {
23 |             disk_scheduler,
24 |             buffer_pool,
25 |         }
26 |     }
27 | 
28 |     pub fn apply(&self, frame: &WalFrame) -> QuillSQLResult<usize> {
29 |         if let Some(manager) = get_resource_manager(frame.rmid) {
30 |             let ctx = RedoContext {
31 |                 disk_scheduler: self.disk_scheduler.clone(),
32 |                 buffer_pool: self.buffer_pool.clone(),
33 |             };
34 |             manager.redo(frame, &ctx)
35 |         } else {
36 |             Ok(0)
37 |         }
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/expression/alias.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::Schema;
 2 | use crate::catalog::{Column, DataType};
 3 | use crate::error::QuillSQLResult;
 4 | use crate::expression::{Expr, ExprTrait};
 5 | use crate::storage::tuple::Tuple;
 6 | use crate::utils::scalar::ScalarValue;
 7 | 
 8 | #[derive(Clone, PartialEq, Eq, Debug)]
 9 | pub struct Alias {
10 |     pub expr: Box<Expr>,
11 |     pub name: String,
12 | }
13 | 
14 | impl ExprTrait for Alias {
15 |     fn data_type(&self, input_schema: &Schema) -> QuillSQLResult<DataType> {
16 |         self.expr.data_type(input_schema)
17 |     }
18 | 
19 |     fn nullable(&self, input_schema: &Schema) -> QuillSQLResult<bool> {
20 |         self.expr.nullable(input_schema)
21 |     }
22 | 
23 |     fn evaluate(&self, tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
24 |         self.expr.evaluate(tuple)
25 |     }
26 | 
27 |     fn to_column(&self, input_schema: &Schema) -> QuillSQLResult<Column> {
28 |         Ok(Column::new(
29 |             self.name.clone(),
30 |             self.data_type(input_schema)?,
31 |             self.nullable(input_schema)?,
32 |         ))
33 |     }
34 | }
35 | 
36 | impl std::fmt::Display for Alias {
37 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
38 |         write!(f, "{} AS {}", self.expr, self.name)
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_create_index.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::plan::logical_plan::{CreateIndex, LogicalPlan};
 3 | 
 4 | use super::LogicalPlanner;
 5 | 
 6 | impl<'a> LogicalPlanner<'a> {
 7 |     pub fn plan_create_index(
 8 |         &self,
 9 |         index_name: &sqlparser::ast::ObjectName,
10 |         table_name: &sqlparser::ast::ObjectName,
11 |         columns: &[sqlparser::ast::OrderByExpr],
12 |     ) -> QuillSQLResult<LogicalPlan> {
13 |         let index_name = index_name.0.first().map_or(
14 |             Err(QuillSQLError::Plan(format!(
15 |                 "Index name {index_name} is not expected"
16 |             ))),
17 |             |ident| Ok(ident.value.clone()),
18 |         )?;
19 |         let table = self.bind_table_name(table_name)?;
20 |         let mut columns_expr = vec![];
21 |         for col in columns.iter() {
22 |             let col_expr = self.bind_order_by_expr(col)?;
23 |             columns_expr.push(col_expr);
24 |         }
25 |         let table_schema = self.context.catalog.table_heap(&table)?.schema.clone();
26 |         Ok(LogicalPlan::CreateIndex(CreateIndex {
27 |             index_name,
28 |             table,
29 |             table_schema,
30 |             columns: columns_expr,
31 |         }))
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/recovery/wal/codec/clr.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::recovery::Lsn;
 3 | use crate::transaction::TransactionId;
 4 | 
 5 | #[derive(Debug, Clone)]
 6 | pub struct ClrPayload {
 7 |     pub txn_id: TransactionId,
 8 |     pub undone_lsn: Lsn,
 9 |     pub undo_next_lsn: Lsn,
10 | }
11 | 
12 | pub fn encode_clr(body: &ClrPayload) -> Vec<u8> {
13 |     // CLR (rmid=Clr, info=0)
14 |     // body: txn_id(8) + undone_lsn(8) + undo_next_lsn(8)
15 |     let mut buf = Vec::with_capacity(24);
16 |     buf.extend_from_slice(&body.txn_id.to_le_bytes());
17 |     buf.extend_from_slice(&body.undone_lsn.to_le_bytes());
18 |     buf.extend_from_slice(&body.undo_next_lsn.to_le_bytes());
19 |     buf
20 | }
21 | 
22 | pub fn decode_clr(bytes: &[u8]) -> QuillSQLResult<ClrPayload> {
23 |     if bytes.len() != 24 {
24 |         return Err(QuillSQLError::Internal(
25 |             "CLR payload must be 24 bytes".to_string(),
26 |         ));
27 |     }
28 |     let txn_id = u64::from_le_bytes(bytes[0..8].try_into().unwrap()) as TransactionId;
29 |     let undone_lsn = u64::from_le_bytes(bytes[8..16].try_into().unwrap());
30 |     let undo_next_lsn = u64::from_le_bytes(bytes[16..24].try_into().unwrap());
31 |     Ok(ClrPayload {
32 |         txn_id,
33 |         undone_lsn,
34 |         undo_next_lsn,
35 |     })
36 | }
37 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/analyze.rs:
--------------------------------------------------------------------------------
 1 | use std::fmt::Display;
 2 | 
 3 | use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF};
 4 | use crate::error::QuillSQLResult;
 5 | use crate::execution::{ExecutionContext, VolcanoExecutor};
 6 | use crate::storage::tuple::Tuple;
 7 | use crate::transaction::LockMode;
 8 | use crate::utils::table_ref::TableReference;
 9 | 
10 | #[derive(Debug)]
11 | pub struct PhysicalAnalyze {
12 |     table: TableReference,
13 | }
14 | 
15 | impl PhysicalAnalyze {
16 |     pub fn new(table: TableReference) -> Self {
17 |         Self { table }
18 |     }
19 | }
20 | 
21 | impl VolcanoExecutor for PhysicalAnalyze {
22 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
23 |         context
24 |             .txn_ctx_mut()
25 |             .lock_table(self.table.clone(), LockMode::IntentionShared)?;
26 |         context.catalog.analyze_table(&self.table)?;
27 |         Ok(())
28 |     }
29 | 
30 |     fn next(&self, _context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
31 |         Ok(None)
32 |     }
33 | 
34 |     fn output_schema(&self) -> SchemaRef {
35 |         EMPTY_SCHEMA_REF.clone()
36 |     }
37 | }
38 | 
39 | impl Display for PhysicalAnalyze {
40 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
41 |         write!(f, "Analyze {}", self.table)
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_explain.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use crate::catalog::{Column, DataType, Schema};
 4 | use crate::error::QuillSQLResult;
 5 | use crate::plan::logical_plan::{LogicalPlan, Values};
 6 | use crate::plan::LogicalPlanner;
 7 | use crate::utils::util::pretty_format_logical_plan;
 8 | 
 9 | impl LogicalPlanner<'_> {
10 |     /// Build a plan that returns the formatted logical plan as rows of text.
11 |     pub fn plan_explain(
12 |         &mut self,
13 |         statement: &sqlparser::ast::Statement,
14 |     ) -> QuillSQLResult<LogicalPlan> {
15 |         let inner_plan = self.plan(statement)?;
16 |         let text = pretty_format_logical_plan(&inner_plan);
17 |         let lines: Vec<Vec<crate::expression::Expr>> = text
18 |             .lines()
19 |             .map(|s| {
20 |                 vec![crate::expression::Expr::Literal(
21 |                     crate::expression::Literal {
22 |                         value: s.to_string().into(),
23 |                     },
24 |                 )]
25 |             })
26 |             .collect();
27 | 
28 |         let schema = Arc::new(Schema::new(vec![Column::new(
29 |             "plan",
30 |             DataType::Varchar(None),
31 |             false,
32 |         )]));
33 |         Ok(LogicalPlan::Values(Values {
34 |             schema,
35 |             values: lines,
36 |         }))
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/join.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::expression::Expr;
 3 | use crate::plan::logical_plan::LogicalPlan;
 4 | use std::sync::Arc;
 5 | 
 6 | #[derive(derive_new::new, Debug, Clone)]
 7 | pub struct Join {
 8 |     pub left: Arc<LogicalPlan>,
 9 |     pub right: Arc<LogicalPlan>,
10 |     pub join_type: JoinType,
11 |     pub condition: Option<Expr>,
12 |     pub schema: SchemaRef,
13 | }
14 | 
15 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
16 | pub enum JoinType {
17 |     // select * from x inner join y on ...
18 |     Inner,
19 |     // select * from x left (outer) join y on ...
20 |     LeftOuter,
21 |     // select * from x right (outer) join y on ...
22 |     RightOuter,
23 |     // select * from x full (outer) join y on ...
24 |     FullOuter,
25 |     // select * from x, y
26 |     // select * from x cross join y
27 |     Cross,
28 | }
29 | 
30 | impl std::fmt::Display for Join {
31 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
32 |         write!(f, "{} Join", self.join_type)?;
33 |         if let Some(condition) = self.condition.as_ref() {
34 |             write!(f, ": On {condition}")?;
35 |         }
36 |         Ok(())
37 |     }
38 | }
39 | 
40 | impl std::fmt::Display for JoinType {
41 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
42 |         write!(f, "{self:?}")
43 |     }
44 | }
45 | 


--------------------------------------------------------------------------------
/src/expression/cast.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{Column, DataType, Schema};
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::expression::{Expr, ExprTrait};
 4 | use crate::storage::tuple::Tuple;
 5 | use crate::utils::scalar::ScalarValue;
 6 | 
 7 | /// Cast expression
 8 | #[derive(Clone, PartialEq, Eq, Debug)]
 9 | pub struct Cast {
10 |     /// The expression being cast
11 |     pub expr: Box<Expr>,
12 |     /// The `DataType` the expression will yield
13 |     pub data_type: DataType,
14 | }
15 | 
16 | impl ExprTrait for Cast {
17 |     fn data_type(&self, _input_schema: &Schema) -> QuillSQLResult<DataType> {
18 |         Ok(self.data_type)
19 |     }
20 | 
21 |     fn nullable(&self, input_schema: &Schema) -> QuillSQLResult<bool> {
22 |         self.expr.nullable(input_schema)
23 |     }
24 | 
25 |     fn evaluate(&self, tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
26 |         let value = self.expr.evaluate(tuple)?;
27 |         value.cast_to(&self.data_type)
28 |     }
29 | 
30 |     fn to_column(&self, _input_schema: &Schema) -> QuillSQLResult<Column> {
31 |         Err(QuillSQLError::Plan(format!(
32 |             "expr {:?} as column not supported",
33 |             self
34 |         )))
35 |     }
36 | }
37 | 
38 | impl std::fmt::Display for Cast {
39 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 |         write!(f, "CAST {} AS {}", self.expr, self.data_type)
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/docs/mermaid-init.js:
--------------------------------------------------------------------------------
 1 | // This Source Code Form is subject to the terms of the Mozilla Public
 2 | // License, v. 2.0. If a copy of the MPL was not distributed with this
 3 | // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 4 | 
 5 | (() => {
 6 |     const darkThemes = ['ayu', 'navy', 'coal'];
 7 |     const lightThemes = ['light', 'rust'];
 8 | 
 9 |     const classList = document.getElementsByTagName('html')[0].classList;
10 | 
11 |     let lastThemeWasLight = true;
12 |     for (const cssClass of classList) {
13 |         if (darkThemes.includes(cssClass)) {
14 |             lastThemeWasLight = false;
15 |             break;
16 |         }
17 |     }
18 | 
19 |     const theme = lastThemeWasLight ? 'default' : 'dark';
20 |     mermaid.initialize({ startOnLoad: true, theme });
21 | 
22 |     // Simplest way to make mermaid re-render the diagrams in the new theme is via refreshing the page
23 | 
24 |     for (const darkTheme of darkThemes) {
25 |         document.getElementById(darkTheme).addEventListener('click', () => {
26 |             if (lastThemeWasLight) {
27 |                 window.location.reload();
28 |             }
29 |         });
30 |     }
31 | 
32 |     for (const lightTheme of lightThemes) {
33 |         document.getElementById(lightTheme).addEventListener('click', () => {
34 |             if (!lastThemeWasLight) {
35 |                 window.location.reload();
36 |             }
37 |         });
38 |     }
39 | })();
40 | 


--------------------------------------------------------------------------------
/src/function/aggregate/mod.rs:
--------------------------------------------------------------------------------
 1 | mod avg;
 2 | mod count;
 3 | 
 4 | pub use avg::AvgAccumulator;
 5 | pub use count::CountAccumulator;
 6 | use std::fmt::Debug;
 7 | 
 8 | use crate::error::QuillSQLResult;
 9 | use crate::utils::scalar::ScalarValue;
10 | use strum::{EnumIter, IntoEnumIterator};
11 | 
12 | #[derive(Clone, PartialEq, Eq, Debug, EnumIter)]
13 | pub enum AggregateFunctionKind {
14 |     Count,
15 |     Avg,
16 | }
17 | 
18 | impl AggregateFunctionKind {
19 |     pub fn create_accumulator(&self) -> Box<dyn Accumulator> {
20 |         match self {
21 |             AggregateFunctionKind::Count => Box::new(CountAccumulator::new()),
22 |             AggregateFunctionKind::Avg => Box::new(AvgAccumulator::new()),
23 |         }
24 |     }
25 | 
26 |     pub fn find(name: &str) -> Option<Self> {
27 |         AggregateFunctionKind::iter().find(|kind| kind.to_string().eq_ignore_ascii_case(name))
28 |     }
29 | }
30 | 
31 | impl std::fmt::Display for AggregateFunctionKind {
32 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
33 |         write!(f, "{self:?}")
34 |     }
35 | }
36 | 
37 | pub trait Accumulator: Send + Sync + Debug {
38 |     /// Updates the accumulator's state from its input.
39 |     fn update_value(&mut self, value: &ScalarValue) -> QuillSQLResult<()>;
40 | 
41 |     /// Returns the final aggregate value, consuming the internal state.
42 |     fn evaluate(&self) -> QuillSQLResult<ScalarValue>;
43 | }
44 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/filter.rs:
--------------------------------------------------------------------------------
 1 | use log::debug;
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::catalog::SchemaRef;
 5 | use crate::expression::Expr;
 6 | use crate::{
 7 |     error::QuillSQLResult,
 8 |     execution::{ExecutionContext, VolcanoExecutor},
 9 |     storage::tuple::Tuple,
10 | };
11 | 
12 | use super::PhysicalPlan;
13 | 
14 | #[derive(derive_new::new, Debug)]
15 | pub struct PhysicalFilter {
16 |     pub predicate: Expr,
17 |     pub input: Arc<PhysicalPlan>,
18 | }
19 | 
20 | impl VolcanoExecutor for PhysicalFilter {
21 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
22 |         debug!("init filter executor");
23 |         self.input.init(context)
24 |     }
25 | 
26 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
27 |         loop {
28 |             if let Some(tuple) = self.input.next(context)? {
29 |                 if context.eval_predicate(&self.predicate, &tuple)? {
30 |                     return Ok(Some(tuple));
31 |                 }
32 |             } else {
33 |                 return Ok(None);
34 |             }
35 |         }
36 |     }
37 | 
38 |     fn output_schema(&self) -> SchemaRef {
39 |         self.input.output_schema()
40 |     }
41 | }
42 | 
43 | impl std::fmt::Display for PhysicalFilter {
44 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 |         write!(f, "Filter: {}", self.predicate)
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/project.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use crate::catalog::SchemaRef;
 4 | use crate::expression::Expr;
 5 | use crate::{
 6 |     error::QuillSQLResult,
 7 |     execution::{ExecutionContext, VolcanoExecutor},
 8 |     storage::tuple::Tuple,
 9 | };
10 | 
11 | use super::PhysicalPlan;
12 | 
13 | #[derive(derive_new::new, Debug)]
14 | pub struct PhysicalProject {
15 |     pub exprs: Vec<Expr>,
16 |     pub schema: SchemaRef,
17 |     pub input: Arc<PhysicalPlan>,
18 | }
19 | 
20 | impl VolcanoExecutor for PhysicalProject {
21 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
22 |         self.input.init(context)
23 |     }
24 | 
25 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
26 |         if let Some(tuple) = self.input.next(context)? {
27 |             let mut new_values = Vec::with_capacity(self.exprs.len());
28 |             for expr in &self.exprs {
29 |                 new_values.push(context.eval_expr(expr, &tuple)?);
30 |             }
31 |             Ok(Some(Tuple::new(self.output_schema(), new_values)))
32 |         } else {
33 |             Ok(None)
34 |         }
35 |     }
36 | 
37 |     fn output_schema(&self) -> SchemaRef {
38 |         self.schema.clone()
39 |     }
40 | }
41 | 
42 | impl std::fmt::Display for PhysicalProject {
43 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44 |         write!(f, "Project")
45 |     }
46 | }
47 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_delete.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::plan::logical_plan::{Delete, LogicalPlan};
 3 | use crate::plan::LogicalPlanner;
 4 | 
 5 | impl<'a> LogicalPlanner<'a> {
 6 |     pub fn plan_delete(
 7 |         &self,
 8 |         table: &sqlparser::ast::TableWithJoins,
 9 |         selection: &Option<sqlparser::ast::Expr>,
10 |     ) -> QuillSQLResult<LogicalPlan> {
11 |         if !table.joins.is_empty() {
12 |             return Err(QuillSQLError::Plan(
13 |                 "DELETE with joins is not supported".to_string(),
14 |             ));
15 |         }
16 | 
17 |         let table_ref = match &table.relation {
18 |             sqlparser::ast::TableFactor::Table { name, .. } => self.bind_table_name(name)?,
19 |             _ => {
20 |                 return Err(QuillSQLError::Plan(format!(
21 |                     "Table {} is not supported in DELETE",
22 |                     table
23 |                 )))
24 |             }
25 |         };
26 | 
27 |         let table_heap = self.context.catalog.table_heap(&table_ref)?;
28 |         let table_schema = table_heap.schema.clone();
29 | 
30 |         let predicate = match selection {
31 |             Some(expr) => Some(self.bind_expr(expr)?),
32 |             None => None,
33 |         };
34 | 
35 |         Ok(LogicalPlan::Delete(Delete {
36 |             table: table_ref,
37 |             table_schema,
38 |             selection: predicate,
39 |         }))
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/sort.rs:
--------------------------------------------------------------------------------
 1 | use crate::expression::Expr;
 2 | use crate::plan::logical_plan::LogicalPlan;
 3 | use std::sync::Arc;
 4 | 
 5 | #[derive(derive_new::new, Debug, Clone)]
 6 | pub struct Sort {
 7 |     pub order_by: Vec<OrderByExpr>,
 8 |     pub input: Arc<LogicalPlan>,
 9 |     pub limit: Option<usize>,
10 | }
11 | 
12 | #[derive(Clone, PartialEq, Eq, Debug)]
13 | pub struct OrderByExpr {
14 |     /// The expression to sort on
15 |     pub expr: Box<Expr>,
16 |     /// The direction of the sort
17 |     pub asc: bool,
18 |     /// Whether to put Nulls before all other data values
19 |     pub nulls_first: bool,
20 | }
21 | 
22 | impl std::fmt::Display for OrderByExpr {
23 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24 |         write!(
25 |             f,
26 |             "{} {} {}",
27 |             self.expr,
28 |             if self.asc { "ASC" } else { "DESC" },
29 |             if self.nulls_first {
30 |                 "NULLS FIRST"
31 |             } else {
32 |                 "NULLS LAST"
33 |             }
34 |         )
35 |     }
36 | }
37 | 
38 | impl std::fmt::Display for Sort {
39 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 |         write!(
41 |             f,
42 |             "Sort: {}",
43 |             self.order_by
44 |                 .iter()
45 |                 .map(|e| format!("{e}"))
46 |                 .collect::<Vec<_>>()
47 |                 .join(", ")
48 |         )
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/src/function/aggregate/avg.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::DataType;
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::function::aggregate::Accumulator;
 4 | use crate::utils::scalar::ScalarValue;
 5 | 
 6 | #[derive(Debug, Clone)]
 7 | pub struct AvgAccumulator {
 8 |     sum: Option<f64>,
 9 |     count: u64,
10 | }
11 | 
12 | impl AvgAccumulator {
13 |     pub fn new() -> Self {
14 |         Self {
15 |             sum: None,
16 |             count: 0,
17 |         }
18 |     }
19 | }
20 | 
21 | impl Accumulator for AvgAccumulator {
22 |     fn update_value(&mut self, value: &ScalarValue) -> QuillSQLResult<()> {
23 |         if !value.is_null() {
24 |             let value = match value.cast_to(&DataType::Float64)? {
25 |                 ScalarValue::Float64(Some(v)) => v,
26 |                 _ => {
27 |                     return Err(QuillSQLError::Internal(format!(
28 |                         "Failed to cast value {} to float64",
29 |                         value
30 |                     )))
31 |                 }
32 |             };
33 | 
34 |             match self.sum {
35 |                 Some(sum) => self.sum = Some(sum + value),
36 |                 None => self.sum = Some(value),
37 |             }
38 |             self.count += 1;
39 |         }
40 |         Ok(())
41 |     }
42 | 
43 |     fn evaluate(&self) -> QuillSQLResult<ScalarValue> {
44 |         Ok(ScalarValue::Float64(
45 |             self.sum.map(|f| f / self.count as f64),
46 |         ))
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/empty.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::execution::{ExecutionContext, VolcanoExecutor};
 3 | use crate::{error::QuillSQLResult, storage::tuple::Tuple};
 4 | use std::sync::atomic::{AtomicUsize, Ordering};
 5 | 
 6 | #[derive(Debug)]
 7 | pub struct PhysicalEmpty {
 8 |     pub produce_row_count: usize,
 9 |     pub schema: SchemaRef,
10 |     outputted_count: AtomicUsize,
11 | }
12 | 
13 | impl PhysicalEmpty {
14 |     pub fn new(produce_row_count: usize, schema: SchemaRef) -> Self {
15 |         Self {
16 |             produce_row_count,
17 |             schema,
18 |             outputted_count: AtomicUsize::new(0),
19 |         }
20 |     }
21 | }
22 | 
23 | impl VolcanoExecutor for PhysicalEmpty {
24 |     fn init(&self, _context: &mut ExecutionContext) -> QuillSQLResult<()> {
25 |         self.outputted_count.store(0, Ordering::SeqCst);
26 |         Ok(())
27 |     }
28 |     fn next(&self, _context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
29 |         if self.outputted_count.fetch_add(1, Ordering::SeqCst) < self.produce_row_count {
30 |             Ok(Some(Tuple::new(self.schema.clone(), vec![])))
31 |         } else {
32 |             Ok(None)
33 |         }
34 |     }
35 | 
36 |     fn output_schema(&self) -> SchemaRef {
37 |         self.schema.clone()
38 |     }
39 | }
40 | 
41 | impl std::fmt::Display for PhysicalEmpty {
42 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
43 |         write!(f, "Empty")
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/src/transaction/lock_guard.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::Arc;
 2 | 
 3 | use crate::storage::page::RecordId;
 4 | use crate::transaction::{LockManager, TransactionId};
 5 | use crate::utils::table_ref::TableReference;
 6 | 
 7 | pub struct RowLockGuard {
 8 |     manager: Arc<LockManager>,
 9 |     txn_id: TransactionId,
10 |     table: TableReference,
11 |     rid: RecordId,
12 |     released: bool,
13 | }
14 | 
15 | impl RowLockGuard {
16 |     pub fn new(
17 |         manager: Arc<LockManager>,
18 |         txn_id: TransactionId,
19 |         table: TableReference,
20 |         rid: RecordId,
21 |     ) -> Self {
22 |         Self {
23 |             manager,
24 |             txn_id,
25 |             table,
26 |             rid,
27 |             released: false,
28 |         }
29 |     }
30 | 
31 |     pub fn release(mut self) {
32 |         self.do_release();
33 |     }
34 | 
35 |     fn do_release(&mut self) {
36 |         if !self.released {
37 |             let _ = self
38 |                 .manager
39 |                 .unlock_row_raw(self.txn_id, self.table.clone(), self.rid);
40 |             self.released = true;
41 |         }
42 |     }
43 | }
44 | 
45 | impl Drop for RowLockGuard {
46 |     fn drop(&mut self) {
47 |         self.do_release();
48 |     }
49 | }
50 | 
51 | pub enum TxnReadGuard {
52 |     Temporary(RowLockGuard),
53 | }
54 | 
55 | impl TxnReadGuard {
56 |     pub fn release(self) {
57 |         match self {
58 |             TxnReadGuard::Temporary(guard) => guard.release(),
59 |         }
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/docs/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | - [Introduction](./introduction.md)
 4 | - [Overall Architecture](./architecture.md)
 5 | - [Module Overview](./modules/overview.md)
 6 | 
 7 | ---
 8 | 
 9 | - [Contributor's Guide](./contributing.md)
10 | 
11 | ---
12 | 
13 | - [SQL Front-End](./modules/sql.md)
14 | - [Catalog](./modules/catalog.md)
15 | - [Expression System](./modules/expression.md)
16 | - [Query Plan](./modules/plan.md)
17 |     - [The Lifecycle of a Query](./plan/lifecycle.md)
18 | - [Query Optimizer](./modules/optimizer.md)
19 |     - [Rule-Based Optimization](./optimizer/rules.md)
20 | - [Execution Engine](./modules/execution.md)
21 |     - [The Volcano Model](./execution/volcano.md)
22 | - [Transaction Manager](./modules/transaction.md)
23 |     - [MVCC and 2PL](./transaction/mvcc_and_2pl.md)
24 | - [Storage Engine](./modules/storage.md)
25 |     - [Disk I/O](./storage/disk_io.md)
26 |     - [Page & Tuple Layout](./storage/page_layouts.md)
27 |     - [Table Heap & MVCC](./storage/table_heap.md)
28 | - [Buffer Manager](./modules/buffer.md)
29 |     - [Page & Page Guards](./buffer/page.md)
30 |     - [The Buffer Pool](./buffer/buffer_pool.md)
31 | - [Indexes](./modules/index.md)
32 |     - [B+Tree](./index/btree_index.md)
33 | - [Recovery Manager (WAL)](./modules/recovery.md)
34 |     - [The ARIES Protocol](./recovery/aries.md)
35 |     - [Write-Ahead Logging](./recovery/wal.md)
36 | - [Background Services](./modules/background.md)
37 | - [Configuration](./modules/config.md)
38 | - [Front-Ends (CLI / HTTP)](./modules/bin.md)
39 | - [Testing & Documentation](./modules/tests.md)
40 | 


--------------------------------------------------------------------------------
/src/recovery/wal/io.rs:
--------------------------------------------------------------------------------
 1 | use std::path::PathBuf;
 2 | use std::sync::Arc;
 3 | 
 4 | use bytes::Bytes;
 5 | 
 6 | use crate::error::QuillSQLResult;
 7 | use crate::storage::disk_scheduler::{DiskCommandResultReceiver, DiskScheduler};
 8 | 
 9 | pub type WalIoTicket = DiskCommandResultReceiver<()>;
10 | 
11 | pub trait WalSink: Send + Sync {
12 |     fn schedule_write(
13 |         &self,
14 |         path: PathBuf,
15 |         offset: u64,
16 |         data: Bytes,
17 |         sync: bool,
18 |     ) -> QuillSQLResult<Option<WalIoTicket>>;
19 | 
20 |     fn schedule_fsync(&self, path: PathBuf) -> QuillSQLResult<Option<WalIoTicket>>;
21 | }
22 | 
23 | #[derive(Clone)]
24 | pub struct DiskSchedulerWalSink {
25 |     scheduler: Arc<DiskScheduler>,
26 | }
27 | 
28 | impl DiskSchedulerWalSink {
29 |     pub fn new(scheduler: Arc<DiskScheduler>) -> Self {
30 |         Self { scheduler }
31 |     }
32 | }
33 | 
34 | impl WalSink for DiskSchedulerWalSink {
35 |     fn schedule_write(
36 |         &self,
37 |         path: PathBuf,
38 |         offset: u64,
39 |         data: Bytes,
40 |         sync: bool,
41 |     ) -> QuillSQLResult<Option<WalIoTicket>> {
42 |         if data.is_empty() && !sync {
43 |             return Ok(None);
44 |         }
45 |         let receiver = self
46 |             .scheduler
47 |             .schedule_wal_write(path, offset, data, sync)?;
48 |         Ok(Some(receiver))
49 |     }
50 | 
51 |     fn schedule_fsync(&self, path: PathBuf) -> QuillSQLResult<Option<WalIoTicket>> {
52 |         let receiver = self.scheduler.schedule_wal_fsync(path)?;
53 |         Ok(Some(receiver))
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/docs/src/modules/config.md:
--------------------------------------------------------------------------------
 1 | # Configuration & Runtime Options
 2 | 
 3 | `src/config/` centralizes tunables used by `DatabaseOptions`, the CLI/HTTP front-ends, and
 4 | background workers. Keeping knobs in one place makes it easy to demonstrate how WAL,
 5 | buffering, or vacuum behavior changes under different settings.
 6 | 
 7 | ---
 8 | 
 9 | ## Key Types
10 | 
11 | | Type | Description |
12 | | ---- | ----------- |
13 | | `DatabaseOptions` | Top-level options when constructing a database (WAL config, default isolation, etc.). |
14 | | `WalOptions` | WAL directory, segment size, flush strategy, writer interval, sync mode. |
15 | | `IndexVacuumConfig` / `MvccVacuumConfig` | Background worker intervals (buffer writer, MVCC vacuum). |
16 | | `BufferPoolConfig` | Optional overrides for pool size, TinyLFU, and replacement policy details. |
17 | 
18 | ---
19 | 
20 | ## Usage
21 | 
22 | - CLI/HTTP front-ends parse env vars or config files into `DatabaseOptions` and pass them
23 |   to `Database::new_*`.
24 | - During `bootstrap_storage`, the database wires these options into `WalManager`,
25 |   `DiskScheduler`, and `BackgroundWorkers`.
26 | - Workers and execution components receive `Arc` references to the relevant configs so
27 |   they can adapt at runtime without global state.
28 | 
29 | ---
30 | 
31 | ## Teaching Ideas
32 | 
33 | - Toggle `WalOptions::synchronous_commit` to discuss commit latency vs durability.
34 | - Shrink the buffer pool to highlight eviction behavior under different replacement
35 |   policies.
36 | - Adjust `MvccVacuumConfig` intervals and measure how vacuum frequency affects foreground
37 |   write throughput.
38 | 


--------------------------------------------------------------------------------
/src/recovery/wal/codec/txn.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::transaction::TransactionId;
 3 | 
 4 | #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 5 | #[repr(u8)]
 6 | pub enum TransactionRecordKind {
 7 |     Begin = 1,
 8 |     Commit = 2,
 9 |     Abort = 3,
10 | }
11 | 
12 | impl TransactionRecordKind {
13 |     pub fn from_u8(value: u8) -> QuillSQLResult<Self> {
14 |         match value {
15 |             1 => Ok(TransactionRecordKind::Begin),
16 |             2 => Ok(TransactionRecordKind::Commit),
17 |             3 => Ok(TransactionRecordKind::Abort),
18 |             other => Err(QuillSQLError::Internal(format!(
19 |                 "Unknown transaction record kind: {}",
20 |                 other
21 |             ))),
22 |         }
23 |     }
24 | }
25 | 
26 | #[derive(Debug, Clone)]
27 | pub struct TransactionPayload {
28 |     pub marker: TransactionRecordKind,
29 |     pub txn_id: TransactionId,
30 | }
31 | 
32 | pub fn encode_transaction(body: &TransactionPayload) -> (u8, Vec<u8>) {
33 |     let mut buf = Vec::with_capacity(8);
34 |     buf.extend_from_slice(&body.txn_id.to_le_bytes());
35 |     (body.marker as u8, buf)
36 | }
37 | 
38 | pub fn decode_transaction(bytes: &[u8], info: u8) -> QuillSQLResult<TransactionPayload> {
39 |     if bytes.len() != 8 {
40 |         return Err(QuillSQLError::Internal(
41 |             "Transaction payload must be 8 bytes".to_string(),
42 |         ));
43 |     }
44 |     let txn_id = u64::from_le_bytes(bytes[0..8].try_into().unwrap()) as TransactionId;
45 |     let marker = TransactionRecordKind::from_u8(info)?;
46 |     Ok(TransactionPayload { marker, txn_id })
47 | }
48 | 


--------------------------------------------------------------------------------
/docs/src/modules/bin.md:
--------------------------------------------------------------------------------
 1 | # Front-Ends (CLI / HTTP)
 2 | 
 3 | The `bin/` directory contains the user-facing entry points. Both binaries embed the same
 4 | `Database` type, so they demonstrate how the core engine can power different UIs.
 5 | 
 6 | | Binary | Purpose |
 7 | | ------ | ------- |
 8 | | `client.rs` | Interactive CLI (REPL) that reads SQL, executes it, and prints tabular output. |
 9 | | `server.rs` | HTTP + JSON API for integration tests or web UIs. |
10 | 
11 | ---
12 | 
13 | ## CLI (`bin/client.rs`)
14 | 
15 | - Uses `rustyline` to provide history, multi-line editing, and familiar shell shortcuts.
16 | - Each command calls `database.run(sql)` and formats the resulting `Vec<Tuple>`.
17 | - Supports meta commands (e.g., `.tables`) that expose catalog metadata—great for
18 |   teaching how logical objects map to physical handles.
19 | 
20 | ## HTTP (`bin/server.rs`)
21 | 
22 | - Built with `axum`/`hyper` (depending on the current `Cargo.toml`), exposing endpoints such as:
23 |   - `POST /query` – run arbitrary SQL and return rows or an error payload.
24 |   - Health/metrics endpoints—which you can extend in labs to surface background worker
25 |     status or buffer metrics.
26 | - Configuration comes from `QUILL_DB_FILE`, `QUILL_HTTP_ADDR`, `PORT`, etc., mirroring
27 |   how production services inject settings.
28 | 
29 | ---
30 | 
31 | ## Teaching Ideas
32 | 
33 | - Extend the CLI with `\describe table` to practice catalog lookups.
34 | - Add transaction endpoints (BEGIN/COMMIT) to the HTTP server to demonstrate how
35 |   `SessionContext` scopes transactions per connection.
36 | - Combine CLI interaction with `RUST_LOG` tracing to walk through the entire query
37 |   lifecycle.
38 | 


--------------------------------------------------------------------------------
/src/recovery/wal/codec/page.rs:
--------------------------------------------------------------------------------
 1 | use crate::buffer::PageId;
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::recovery::Lsn;
 4 | 
 5 | #[derive(Debug, Clone)]
 6 | pub struct PageWritePayload {
 7 |     pub page_id: PageId,
 8 |     pub prev_page_lsn: Lsn,
 9 |     pub page_image: Vec<u8>,
10 | }
11 | 
12 | pub fn encode_page_write(body: &PageWritePayload) -> Vec<u8> {
13 |     // Page/PageWrite (rmid=Page, info=0)
14 |     // body: page_id(4) + prev_page_lsn(8) + image_len(4) + page_image[]
15 |     let mut buf = Vec::with_capacity(4 + 8 + 4 + body.page_image.len());
16 |     buf.extend_from_slice(&body.page_id.to_le_bytes());
17 |     buf.extend_from_slice(&body.prev_page_lsn.to_le_bytes());
18 |     buf.extend_from_slice(&(body.page_image.len() as u32).to_le_bytes());
19 |     buf.extend_from_slice(&body.page_image);
20 |     buf
21 | }
22 | 
23 | pub fn decode_page_write(bytes: &[u8]) -> QuillSQLResult<PageWritePayload> {
24 |     if bytes.len() < 4 + 8 + 4 {
25 |         return Err(QuillSQLError::Internal(
26 |             "PageWrite payload too short".to_string(),
27 |         ));
28 |     }
29 |     let page_id = u32::from_le_bytes(bytes[0..4].try_into().unwrap()) as PageId;
30 |     let prev_page_lsn = u64::from_le_bytes(bytes[4..12].try_into().unwrap()) as Lsn;
31 |     let image_len = u32::from_le_bytes(bytes[12..16].try_into().unwrap()) as usize;
32 |     if bytes.len() != 16 + image_len {
33 |         return Err(QuillSQLError::Internal(
34 |             "PageWrite payload length mismatch".to_string(),
35 |         ));
36 |     }
37 |     let page_image = bytes[16..].to_vec();
38 |     Ok(PageWritePayload {
39 |         page_id,
40 |         prev_page_lsn,
41 |         page_image,
42 |     })
43 | }
44 | 


--------------------------------------------------------------------------------
/docs/src/modules/tests.md:
--------------------------------------------------------------------------------
 1 | # Testing & Documentation
 2 | 
 3 | QuillSQL is intended for teaching, so the repo invests heavily in examples and automated
 4 | verification. The `tests/` tree and this mdBook work together to illustrate every module.
 5 | 
 6 | ---
 7 | 
 8 | ## Test Suite
 9 | 
10 | | Location | Purpose |
11 | | -------- | ------- |
12 | | `tests/sql_example/*.slt` | [sqllogictest](https://www.sqlite.org/sqllogictest.html) suites covering DDL, DML, transactions, and indexes. |
13 | | `tests/transaction_tests.rs` | Rust unit tests that stress MVCC visibility, lock conflicts, and isolation semantics. |
14 | | `tests/storage_*` | Component tests for heap/index/buffer internals—perfect references for lab exercises. |
15 | 
16 | Common commands:
17 | 
18 | ```bash
19 | cargo test -q
20 | # focused run
21 | cargo test tests::transaction_tests::repeatable_read_sees_consistent_snapshot_after_update -- --nocapture
22 | ```
23 | 
24 | For long-running suites, wrap with `timeout` to guard against hangs.
25 | 
26 | ---
27 | 
28 | ## Documentation (mdBook)
29 | 
30 | - The `docs/` directory is an mdBook; run `mdbook serve docs` to browse locally.
31 | - Each module, including this page, has a dedicated chapter so instructors can teach
32 |   subsystem by subsystem.
33 | - Anchor chapters such as `architecture.md`, `transactions.md`, and `wal.md` walk through
34 |   end-to-end flows and subsystem deep dives.
35 | 
36 | ---
37 | 
38 | ## Teaching Ideas
39 | 
40 | - Require sqllogictest additions alongside code changes to reinforce “tests as docs”.
41 | - Use the mdBook site during lectures to connect diagrams with source files.
42 | - Assign “doc walk-through” tasks where students extend diagrams or add experiment
43 |   instructions to existing chapters.
44 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_insert.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::plan::logical_plan::{Insert, LogicalPlan, Values};
 5 | 
 6 | use super::LogicalPlanner;
 7 | 
 8 | impl<'a> LogicalPlanner<'a> {
 9 |     pub fn plan_insert(
10 |         &self,
11 |         table_name: &sqlparser::ast::ObjectName,
12 |         columns_ident: &Vec<sqlparser::ast::Ident>,
13 |         source: &sqlparser::ast::Query,
14 |     ) -> QuillSQLResult<LogicalPlan> {
15 |         let mut input = self.plan_set_expr(source.body.as_ref())?;
16 |         let table = self.bind_table_name(table_name)?;
17 |         let table_schema = self.context.catalog.table_heap(&table)?.schema.clone();
18 | 
19 |         let projected_schema = if columns_ident.is_empty() {
20 |             table_schema.clone()
21 |         } else {
22 |             let columns: Vec<String> = columns_ident
23 |                 .iter()
24 |                 .map(|ident| ident.value.clone())
25 |                 .collect();
26 |             let indices = columns
27 |                 .iter()
28 |                 .map(|name| table_schema.index_of(Some(&table), name.as_str()))
29 |                 .collect::<QuillSQLResult<Vec<usize>>>()?;
30 | 
31 |             Arc::new(table_schema.project(&indices)?)
32 |         };
33 | 
34 |         if let LogicalPlan::Values(Values { values, .. }) = input {
35 |             input = LogicalPlan::Values(Values {
36 |                 values,
37 |                 schema: projected_schema.clone(),
38 |             })
39 |         }
40 | 
41 |         Ok(LogicalPlan::Insert(Insert {
42 |             table,
43 |             table_schema,
44 |             projected_schema,
45 |             input: Arc::new(input),
46 |         }))
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/recovery/wal/writer.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::atomic::{AtomicBool, Ordering};
 2 | use std::sync::{Arc, Weak};
 3 | use std::thread;
 4 | use std::time::Duration;
 5 | 
 6 | use crate::error::{QuillSQLError, QuillSQLResult};
 7 | 
 8 | use super::WalManager;
 9 | 
10 | #[derive(Debug)]
11 | pub(super) struct WalWriterRuntime {
12 |     stop_flag: Arc<AtomicBool>,
13 |     thread: thread::JoinHandle<()>,
14 | }
15 | 
16 | impl WalWriterRuntime {
17 |     pub(super) fn spawn(target: Weak<WalManager>, interval: Duration) -> QuillSQLResult<Self> {
18 |         let stop_flag = Arc::new(AtomicBool::new(false));
19 |         let thread_stop = stop_flag.clone();
20 |         let handle = thread::Builder::new()
21 |             .name("walwriter".into())
22 |             .spawn(move || {
23 |                 while !thread_stop.load(Ordering::Relaxed) {
24 |                     if let Some(manager) = target.upgrade() {
25 |                         let _ = manager.flush(None);
26 |                     } else {
27 |                         break;
28 |                     }
29 |                     thread::sleep(interval);
30 |                 }
31 |                 if let Some(manager) = target.upgrade() {
32 |                     let _ = manager.flush(None);
33 |                 }
34 |             })
35 |             .map_err(|e| QuillSQLError::Internal(format!("Failed to spawn walwriter: {}", e)))?;
36 |         Ok(Self {
37 |             stop_flag,
38 |             thread: handle,
39 |         })
40 |     }
41 | 
42 |     pub(super) fn stop(self) -> QuillSQLResult<()> {
43 |         self.stop_flag.store(true, Ordering::Release);
44 |         self.thread
45 |             .join()
46 |             .map_err(|_| QuillSQLError::Internal("walwriter thread panicked".to_string()))
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/storage/page/freelist_page.rs:
--------------------------------------------------------------------------------
 1 | use crate::buffer::{PageId, INVALID_PAGE_ID, PAGE_SIZE};
 2 | use crate::storage::codec::{CommonCodec, FreelistPageHeaderCodec};
 3 | use std::sync::LazyLock;
 4 | 
 5 | static EMPTY_FREELIST_PAGE_HEADER: FreelistPageHeader = FreelistPageHeader {
 6 |     next_page_id: 0,
 7 |     current_size: 0,
 8 |     max_size: 0,
 9 | };
10 | 
11 | pub static FREELIST_PAGE_MAX_SIZE: LazyLock<usize> = LazyLock::new(|| {
12 |     (PAGE_SIZE - FreelistPageHeaderCodec::encode(&EMPTY_FREELIST_PAGE_HEADER).len())
13 |         / CommonCodec::encode_u32(INVALID_PAGE_ID).len()
14 | });
15 | 
16 | #[derive(Debug, Eq, PartialEq)]
17 | pub struct FreelistPage {
18 |     pub header: FreelistPageHeader,
19 |     pub array: Vec<PageId>,
20 | }
21 | 
22 | #[derive(Debug, Eq, PartialEq)]
23 | pub struct FreelistPageHeader {
24 |     pub next_page_id: PageId,
25 |     pub current_size: u32,
26 |     pub max_size: u32,
27 | }
28 | 
29 | impl FreelistPage {
30 |     pub fn new() -> Self {
31 |         Self {
32 |             header: FreelistPageHeader {
33 |                 next_page_id: INVALID_PAGE_ID,
34 |                 current_size: 0,
35 |                 max_size: *FREELIST_PAGE_MAX_SIZE as u32,
36 |             },
37 |             array: vec![],
38 |         }
39 |     }
40 | 
41 |     pub fn is_full(&self) -> bool {
42 |         self.header.current_size >= self.header.max_size
43 |     }
44 | 
45 |     pub fn push(&mut self, page_id: PageId) {
46 |         self.array.push(page_id);
47 |         self.header.current_size += 1;
48 |     }
49 | 
50 |     pub fn pop(&mut self) -> Option<PageId> {
51 |         let page_id = self.array.pop();
52 |         if page_id.is_some() {
53 |             self.header.current_size -= 1;
54 |         }
55 |         page_id
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/docs/src/introduction.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <img src="assets/rust-db.png" alt="QuillSQL Logo" width="500"/>
 3 | </div>
 4 | 
 5 | # QuillSQL Internals
 6 | 
 7 | Welcome to the technical documentation for QuillSQL.
 8 | 
 9 | This book provides a deep dive into the internal architecture and implementation details of the database. It is intended for developers, contributors, and anyone interested in understanding how a relational database is built from the ground up, referencing concepts from classic database courses like CMU 15-445.
10 | 
11 | ---
12 | 
13 | ## Table of Contents
14 | 
15 | *   [**Overall Architecture**](./architecture.md): A high-level overview of the entire system.
16 | 
17 | *   **Core Modules**
18 |     *   [**Buffer Manager**](./modules/buffer.md): The in-memory page cache.
19 |         *   [Page & Page Guards](./buffer/page.md)
20 |         *   [The Buffer Pool](./buffer/buffer_pool.md)
21 |     *   [**Storage Engine**](./modules/storage.md): How data is physically stored.
22 |         *   [Disk I/O](./storage/disk_io.md)
23 |         *   [Page & Tuple Layout](./storage/page_layouts.md)
24 |         *   [Table Heap & MVCC](./storage/table_heap.md)
25 |     *   [**Indexes**](./modules/index.md): The B+Tree implementation.
26 |         *   [B+Tree Details](./index/btree_index.md)
27 |     *   [**Recovery Manager (WAL)**](./modules/recovery.md): Crash recovery and the ARIES protocol.
28 |     *   [**Transaction Manager**](./modules/transaction.md): Concurrency control with MVCC and 2PL.
29 |     *   [**Query Plan**](./modules/plan.md): The journey from SQL to an executable plan.
30 |     *   [**Query Optimizer**](./modules/optimizer.md): Rule-based plan transformations.
31 |     *   [**Execution Engine**](./modules/execution.md): The Volcano (iterator) execution model.
32 | 


--------------------------------------------------------------------------------
/src/plan/logical_plan/util.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{ColumnRef, Schema};
 2 | use crate::error::QuillSQLResult;
 3 | use crate::expression::{Expr, ExprTrait};
 4 | use crate::plan::logical_plan::JoinType;
 5 | use crate::plan::logical_plan::LogicalPlan;
 6 | use std::sync::Arc;
 7 | 
 8 | pub fn build_join_schema(
 9 |     left: &Schema,
10 |     right: &Schema,
11 |     join_type: JoinType,
12 | ) -> QuillSQLResult<Schema> {
13 |     fn nullify_columns(columns: &[ColumnRef]) -> Vec<ColumnRef> {
14 |         columns
15 |             .iter()
16 |             .map(|f| Arc::new(f.as_ref().clone().with_nullable(true)))
17 |             .collect()
18 |     }
19 | 
20 |     let left_cols = &left.columns;
21 |     let right_cols = &right.columns;
22 | 
23 |     let columns: Vec<ColumnRef> = match join_type {
24 |         JoinType::Inner | JoinType::Cross => {
25 |             left_cols.iter().chain(right_cols.iter()).cloned().collect()
26 |         }
27 |         JoinType::LeftOuter => left_cols
28 |             .iter()
29 |             .chain(&nullify_columns(right_cols))
30 |             .cloned()
31 |             .collect(),
32 |         JoinType::RightOuter => nullify_columns(left_cols)
33 |             .iter()
34 |             .chain(right_cols.iter())
35 |             .cloned()
36 |             .collect(),
37 |         JoinType::FullOuter => nullify_columns(left_cols)
38 |             .iter()
39 |             .chain(&nullify_columns(right_cols))
40 |             .cloned()
41 |             .collect(),
42 |     };
43 |     Ok(Schema { columns })
44 | }
45 | 
46 | pub fn project_schema(input: &LogicalPlan, exprs: &[Expr]) -> QuillSQLResult<Schema> {
47 |     let input_schema = &input.schema();
48 |     let mut columns = vec![];
49 |     for expr in exprs {
50 |         columns.push(expr.to_column(input_schema)?)
51 |     }
52 |     Ok(Schema::new(columns))
53 | }
54 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_update.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::plan::logical_plan::{LogicalPlan, Update};
 3 | use crate::plan::LogicalPlanner;
 4 | use std::collections::HashMap;
 5 | 
 6 | impl<'a> LogicalPlanner<'a> {
 7 |     pub fn plan_update(
 8 |         &self,
 9 |         table: &sqlparser::ast::TableWithJoins,
10 |         assignments: &[sqlparser::ast::Assignment],
11 |         selection: &Option<sqlparser::ast::Expr>,
12 |     ) -> QuillSQLResult<LogicalPlan> {
13 |         let table_ref = match &table.relation {
14 |             sqlparser::ast::TableFactor::Table { name, .. } => self.bind_table_name(name)?,
15 |             _ => {
16 |                 return Err(QuillSQLError::Plan(format!(
17 |                     "table {} is not supported",
18 |                     table
19 |                 )))
20 |             }
21 |         };
22 | 
23 |         let table_schema = self.context.catalog.table_heap(&table_ref)?.schema.clone();
24 | 
25 |         let mut assignment_map = HashMap::new();
26 |         for assign in assignments {
27 |             let column_ident = assign.id.get(0).ok_or(QuillSQLError::Plan(format!(
28 |                 "Assignment {} is not supported",
29 |                 assign
30 |             )))?;
31 |             let column_name = column_ident.value.to_ascii_lowercase();
32 |             let value = self.bind_expr(&assign.value)?;
33 |             assignment_map.insert(column_name, value);
34 |         }
35 | 
36 |         let selection = match selection {
37 |             Some(e) => Some(self.bind_expr(e)?),
38 |             None => None,
39 |         };
40 | 
41 |         Ok(LogicalPlan::Update(Update {
42 |             table: table_ref,
43 |             table_schema,
44 |             assignments: assignment_map,
45 |             selection,
46 |         }))
47 |     }
48 | }
49 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/create_index.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF};
 2 | use crate::error::QuillSQLError;
 3 | use crate::expression::{ColumnExpr, Expr};
 4 | use crate::plan::logical_plan::OrderByExpr;
 5 | use crate::storage::tuple::Tuple;
 6 | use crate::utils::table_ref::TableReference;
 7 | use crate::{
 8 |     error::QuillSQLResult,
 9 |     execution::{ExecutionContext, VolcanoExecutor},
10 | };
11 | use std::sync::Arc;
12 | 
13 | #[derive(Debug, derive_new::new)]
14 | pub struct PhysicalCreateIndex {
15 |     pub name: String,
16 |     pub table: TableReference,
17 |     pub table_schema: SchemaRef,
18 |     pub columns: Vec<OrderByExpr>,
19 | }
20 | 
21 | impl VolcanoExecutor for PhysicalCreateIndex {
22 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
23 |         let mut key_indices = vec![];
24 |         for col in self.columns.iter() {
25 |             match col.expr.as_ref() {
26 |                 Expr::Column(ColumnExpr { name, .. }) => {
27 |                     key_indices.push(self.table_schema.index_of(None, name)?);
28 |                 }
29 |                 _ => {
30 |                     return Err(QuillSQLError::Execution(format!(
31 |                         "The expr should be column instead of {}",
32 |                         col.expr
33 |                     )))
34 |                 }
35 |             }
36 |         }
37 |         let key_schema = Arc::new(self.table_schema.project(&key_indices)?);
38 |         context
39 |             .catalog
40 |             .create_index(self.name.clone(), &self.table, key_schema)?;
41 |         Ok(None)
42 |     }
43 |     fn output_schema(&self) -> SchemaRef {
44 |         EMPTY_SCHEMA_REF.clone()
45 |     }
46 | }
47 | 
48 | impl std::fmt::Display for PhysicalCreateIndex {
49 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
50 |         write!(f, "CreateIndex: {}", self.name)
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/src/recovery/analysis.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::recovery::control_file::ControlFileSnapshot;
 3 | use crate::recovery::wal::codec::{
 4 |     decode_checkpoint, CheckpointPayload, ResourceManagerId, WalFrame,
 5 | };
 6 | use crate::recovery::Lsn;
 7 | 
 8 | #[derive(Debug, Default, Clone)]
 9 | pub struct AnalysisResult {
10 |     pub start_lsn: Lsn,
11 |     pub has_frames: bool,
12 | }
13 | 
14 | pub struct AnalysisPass {
15 |     latest: Option<(Lsn, CheckpointPayload)>,
16 |     snapshot: Option<ControlFileSnapshot>,
17 |     has_frames: bool,
18 | }
19 | 
20 | impl AnalysisPass {
21 |     pub fn new(snapshot: Option<ControlFileSnapshot>) -> Self {
22 |         Self {
23 |             latest: None,
24 |             snapshot,
25 |             has_frames: false,
26 |         }
27 |     }
28 | 
29 |     pub fn observe(&mut self, frame: &WalFrame) {
30 |         self.has_frames = true;
31 |         if frame.rmid == ResourceManagerId::Checkpoint {
32 |             if let Ok(payload) = decode_checkpoint(&frame.body) {
33 |                 self.latest = Some((frame.lsn, payload));
34 |             }
35 |         }
36 |     }
37 | 
38 |     pub fn finalize(self) -> QuillSQLResult<AnalysisResult> {
39 |         let start_lsn = if let Some((checkpoint_lsn, payload)) = &self.latest {
40 |             self.snapshot
41 |                 .map(|snap| snap.checkpoint_redo_start)
42 |                 .filter(|redo| *redo >= payload.last_lsn && *redo <= *checkpoint_lsn)
43 |                 .unwrap_or_else(|| {
44 |                     payload
45 |                         .dpt
46 |                         .iter()
47 |                         .map(|(_, lsn)| *lsn)
48 |                         .min()
49 |                         .unwrap_or(payload.last_lsn)
50 |                 })
51 |         } else {
52 |             0
53 |         };
54 | 
55 |         Ok(AnalysisResult {
56 |             start_lsn,
57 |             has_frames: self.has_frames,
58 |         })
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/recovery/wal_record.rs:
--------------------------------------------------------------------------------
 1 | use crate::recovery::wal::codec;
 2 | use crate::recovery::Lsn;
 3 | 
 4 | pub use crate::recovery::wal::codec::{
 5 |     decode_checkpoint, decode_clr, decode_frame, decode_page_write,
 6 |     decode_payload as decode_wal_payload, decode_transaction, encode_frame,
 7 |     heap_record_kind_to_info, CheckpointPayload, ClrPayload, PageWritePayload, ResourceManagerId,
 8 |     TransactionPayload, TransactionRecordKind, WalFrame, WAL_CRC_LEN, WAL_HEADER_LEN, WAL_MAGIC,
 9 |     WAL_VERSION, WAL_VERSION_V1,
10 | };
11 | 
12 | pub use crate::storage::heap::wal_codec::{
13 |     decode_heap_record as decode_heap, encode_heap_record as encode_heap, HeapDeletePayload,
14 |     HeapInsertPayload, HeapRecordKind, HeapRecordPayload, RelationIdent, TupleMetaRepr,
15 | };
16 | pub use crate::storage::index::wal_codec::{
17 |     decode_index_record as decode_index, encode_index_record as encode_index,
18 |     IndexInternalEntryPayload, IndexInternalMergePayload, IndexInternalRedistributePayload,
19 |     IndexInternalSplitPayload, IndexLeafDeletePayload, IndexLeafInsertPayload,
20 |     IndexLeafMergePayload, IndexLeafRedistributePayload, IndexLeafSplitEntryPayload,
21 |     IndexLeafSplitPayload, IndexParentDeletePayload, IndexParentInsertPayload,
22 |     IndexParentUpdatePayload, IndexRecordPayload, IndexRelationIdent, IndexRootAdoptPayload,
23 |     IndexRootInstallInternalPayload, IndexRootInstallLeafPayload, IndexRootResetPayload,
24 | };
25 | 
26 | #[derive(Debug, Clone)]
27 | pub enum WalRecordPayload {
28 |     PageWrite(PageWritePayload),
29 |     Transaction(TransactionPayload),
30 |     Heap(HeapRecordPayload),
31 |     Index(IndexRecordPayload),
32 |     Checkpoint(CheckpointPayload),
33 |     /// Compensation log record: documents an UNDO action; redo is a no-op.
34 |     Clr(ClrPayload),
35 | }
36 | 
37 | impl WalRecordPayload {
38 |     pub fn encode(&self, lsn: Lsn, prev_lsn: Lsn) -> Vec<u8> {
39 |         codec::encode_frame(lsn, prev_lsn, self)
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/src/expression/column.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::Schema;
 2 | use crate::catalog::{Column, DataType};
 3 | use crate::error::QuillSQLResult;
 4 | use crate::expression::ExprTrait;
 5 | use crate::storage::tuple::Tuple;
 6 | use crate::utils::scalar::ScalarValue;
 7 | use crate::utils::table_ref::TableReference;
 8 | 
 9 | /// A named reference to a qualified field in a schema.
10 | #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
11 | pub struct ColumnExpr {
12 |     /// relation/table reference.
13 |     pub relation: Option<TableReference>,
14 |     /// field/column name.
15 |     pub name: String,
16 | }
17 | 
18 | impl ExprTrait for ColumnExpr {
19 |     fn data_type(&self, input_schema: &Schema) -> QuillSQLResult<DataType> {
20 |         let column = input_schema.column_with_name(self.relation.as_ref(), &self.name)?;
21 |         Ok(column.data_type)
22 |     }
23 | 
24 |     fn nullable(&self, input_schema: &Schema) -> QuillSQLResult<bool> {
25 |         let column = input_schema.column_with_name(self.relation.as_ref(), &self.name)?;
26 |         Ok(column.nullable)
27 |     }
28 | 
29 |     fn evaluate(&self, tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
30 |         tuple
31 |             .value_by_name(self.relation.as_ref(), &self.name)
32 |             .cloned()
33 |     }
34 | 
35 |     fn to_column(&self, input_schema: &Schema) -> QuillSQLResult<Column> {
36 |         let column = input_schema.column_with_name(self.relation.as_ref(), &self.name)?;
37 |         Ok(Column::new(
38 |             self.name.clone(),
39 |             self.data_type(input_schema)?,
40 |             self.nullable(input_schema)?,
41 |         )
42 |         .with_relation(self.relation.clone().or(column.relation.clone())))
43 |     }
44 | }
45 | 
46 | impl std::fmt::Display for ColumnExpr {
47 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
48 |         if let Some(relation) = self.relation.as_ref() {
49 |             write!(f, "{}.", relation)?;
50 |         }
51 |         write!(f, "{}", self.name)
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/values.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::atomic::{AtomicU32, Ordering};
 2 | 
 3 | use crate::catalog::SchemaRef;
 4 | use crate::expression::Expr;
 5 | use crate::storage::tuple::{Tuple, EMPTY_TUPLE};
 6 | use crate::utils::scalar::ScalarValue;
 7 | use crate::{
 8 |     error::QuillSQLResult,
 9 |     execution::{ExecutionContext, VolcanoExecutor},
10 | };
11 | 
12 | #[derive(Debug)]
13 | pub struct PhysicalValues {
14 |     pub schema: SchemaRef,
15 |     pub rows: Vec<Vec<Expr>>,
16 | 
17 |     cursor: AtomicU32,
18 | }
19 | impl PhysicalValues {
20 |     pub fn new(schema: SchemaRef, rows: Vec<Vec<Expr>>) -> Self {
21 |         PhysicalValues {
22 |             schema,
23 |             rows,
24 |             cursor: AtomicU32::new(0),
25 |         }
26 |     }
27 | }
28 | impl VolcanoExecutor for PhysicalValues {
29 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
30 |         let cursor = self.cursor.fetch_add(1, Ordering::SeqCst) as usize;
31 |         if cursor < self.rows.len() {
32 |             let values = self.rows[cursor]
33 |                 .iter()
34 |                 .map(|e| context.eval_expr(e, &EMPTY_TUPLE))
35 |                 .collect::<QuillSQLResult<Vec<ScalarValue>>>()?;
36 |             debug_assert_eq!(self.schema.column_count(), values.len());
37 | 
38 |             let casted_values = values
39 |                 .iter()
40 |                 .zip(self.schema.columns.iter())
41 |                 .map(|(val, col)| val.cast_to(&col.data_type))
42 |                 .collect::<QuillSQLResult<Vec<ScalarValue>>>()?;
43 | 
44 |             Ok(Some(Tuple::new(self.output_schema(), casted_values)))
45 |         } else {
46 |             Ok(None)
47 |         }
48 |     }
49 | 
50 |     fn output_schema(&self) -> SchemaRef {
51 |         self.schema.clone()
52 |     }
53 | }
54 | 
55 | impl std::fmt::Display for PhysicalValues {
56 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57 |         write!(f, "Values: rows={}", self.rows.len())
58 |     }
59 | }
60 | 


--------------------------------------------------------------------------------
/src/utils/bitmap.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone, Eq, PartialEq)]
 2 | pub struct DynamicBitmap {
 3 |     map: Vec<u8>,
 4 | }
 5 | 
 6 | impl DynamicBitmap {
 7 |     pub fn new() -> Self {
 8 |         Self { map: Vec::new() }
 9 |     }
10 | 
11 |     pub fn set(&mut self, index: usize, value: bool) {
12 |         let byte_idx = index >> 3; // idx / 8
13 |         if byte_idx >= self.map.len() {
14 |             self.map.extend(vec![0; byte_idx - self.map.len() + 1])
15 |         }
16 |         let offset = index & 0b111; // idx % 8
17 |         let mut byte = self.map[byte_idx];
18 | 
19 |         let curval = (byte >> (7 - offset)) & 1;
20 |         let mask = if value { 1 ^ curval } else { curval };
21 |         byte ^= mask << (7 - offset); // Bit flipping
22 |         self.map[byte_idx] = byte;
23 |     }
24 | 
25 |     pub fn get(&self, index: usize) -> Option<bool> {
26 |         if index >= self.map.len() << 8 {
27 |             return None;
28 |         }
29 |         let byte_idx = index >> 3; // idx / 8
30 |         let offset = index & 0b111; // idx % 8
31 |         let byte = self.map[byte_idx];
32 |         Some((byte >> (7 - offset)) & 1 == 1)
33 |     }
34 | 
35 |     pub fn to_bytes(&self) -> Vec<u8> {
36 |         self.map.clone()
37 |     }
38 | 
39 |     pub fn from_bytes(bytes: &[u8]) -> Self {
40 |         Self {
41 |             map: bytes.to_vec(),
42 |         }
43 |     }
44 | }
45 | 
46 | #[cfg(test)]
47 | mod tests {
48 |     use crate::utils::bitmap::DynamicBitmap;
49 | 
50 |     #[test]
51 |     fn dynamic_bitmap() {
52 |         let mut bitmap = DynamicBitmap::new();
53 |         assert_eq!(bitmap.get(0), None);
54 | 
55 |         bitmap.set(3, true);
56 |         assert_eq!(bitmap.map.len(), 1);
57 | 
58 |         bitmap.set(10, true);
59 |         assert_eq!(bitmap.map.len(), 2);
60 | 
61 |         assert_eq!(bitmap.get(0), Some(false));
62 |         assert_eq!(bitmap.get(3), Some(true));
63 |         assert_eq!(bitmap.get(10), Some(true));
64 | 
65 |         let new_bitmap = DynamicBitmap::from_bytes(&bitmap.to_bytes());
66 |         assert_eq!(new_bitmap, bitmap);
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/expression/aggregate.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{Column, DataType, Schema};
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::expression::{Expr, ExprTrait};
 4 | use crate::function::AggregateFunctionKind;
 5 | use crate::storage::tuple::Tuple;
 6 | use crate::utils::scalar::ScalarValue;
 7 | use std::fmt::Debug;
 8 | 
 9 | #[derive(Clone, PartialEq, Eq, Debug)]
10 | pub struct AggregateFunction {
11 |     /// the function kind
12 |     pub func_kind: AggregateFunctionKind,
13 |     /// List of expressions to feed to the functions as arguments
14 |     pub args: Vec<Expr>,
15 |     /// Whether this is a DISTINCT aggregation or not
16 |     pub distinct: bool,
17 | }
18 | 
19 | impl ExprTrait for AggregateFunction {
20 |     fn data_type(&self, _input_schema: &Schema) -> QuillSQLResult<DataType> {
21 |         match self.func_kind {
22 |             AggregateFunctionKind::Count => Ok(DataType::Int64),
23 |             AggregateFunctionKind::Avg => Ok(DataType::Float64),
24 |         }
25 |     }
26 | 
27 |     fn nullable(&self, _input_schema: &Schema) -> QuillSQLResult<bool> {
28 |         Ok(true)
29 |     }
30 | 
31 |     fn evaluate(&self, tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
32 |         match self.func_kind {
33 |             AggregateFunctionKind::Count | AggregateFunctionKind::Avg => {
34 |                 let expr = self.args.first().ok_or(QuillSQLError::Internal(format!(
35 |                     "aggregate function {} should have one arg instead of {:?}",
36 |                     self.func_kind, self.args
37 |                 )))?;
38 |                 expr.evaluate(tuple)
39 |             }
40 |         }
41 |     }
42 | 
43 |     fn to_column(&self, input_schema: &Schema) -> QuillSQLResult<Column> {
44 |         Ok(Column::new(
45 |             format!("{}", self),
46 |             self.data_type(input_schema)?,
47 |             self.nullable(input_schema)?,
48 |         ))
49 |     }
50 | }
51 | 
52 | impl std::fmt::Display for AggregateFunction {
53 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54 |         write!(f, "{}", self.func_kind)
55 |     }
56 | }
57 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/scan.rs:
--------------------------------------------------------------------------------
 1 | //! Shared prefetch buffer utilities for scan operators.
 2 | 
 3 | use std::cell::RefCell;
 4 | use std::collections::VecDeque;
 5 | 
 6 | use crate::error::QuillSQLResult;
 7 | use crate::storage::page::{RecordId, TupleMeta};
 8 | use crate::storage::tuple::Tuple;
 9 | 
10 | pub type ScanEntry = (RecordId, TupleMeta, Tuple);
11 | 
12 | #[derive(Debug)]
13 | pub struct ScanPrefetch {
14 |     buffer: RefCell<VecDeque<ScanEntry>>,
15 |     batch_size: usize,
16 | }
17 | 
18 | impl ScanPrefetch {
19 |     pub fn new(batch_size: usize) -> Self {
20 |         Self {
21 |             buffer: RefCell::new(VecDeque::new()),
22 |             batch_size,
23 |         }
24 |     }
25 | 
26 |     pub fn pop_front(&self) -> Option<ScanEntry> {
27 |         self.buffer.borrow_mut().pop_front()
28 |     }
29 | 
30 |     pub fn clear(&self) {
31 |         self.buffer.borrow_mut().clear();
32 |     }
33 | 
34 |     pub fn refill<F>(&self, mut producer: F) -> QuillSQLResult<bool>
35 |     where
36 |         F: FnMut(usize, &mut VecDeque<ScanEntry>) -> QuillSQLResult<()>,
37 |     {
38 |         let mut fetched = VecDeque::with_capacity(self.batch_size);
39 |         producer(self.batch_size, &mut fetched)?;
40 |         if fetched.is_empty() {
41 |             return Ok(false);
42 |         }
43 |         self.buffer.borrow_mut().extend(fetched);
44 |         Ok(true)
45 |     }
46 | }
47 | 
48 | #[cfg(test)]
49 | mod tests {
50 |     use super::*;
51 | 
52 |     #[test]
53 |     fn prefetch_refill_and_pop() {
54 |         let prefetch = ScanPrefetch::new(2);
55 |         let rid = RecordId::new(1, 0);
56 |         let meta = TupleMeta::new(1, 0);
57 |         let tuple = Tuple::empty(crate::catalog::EMPTY_SCHEMA_REF.clone());
58 |         let produced = prefetch
59 |             .refill(|_, out| {
60 |                 out.push_back((rid, meta, tuple.clone()));
61 |                 Ok(())
62 |             })
63 |             .expect("refill should succeed");
64 |         assert!(produced);
65 |         assert!(prefetch.pop_front().is_some());
66 |         assert!(prefetch.pop_front().is_none());
67 |     }
68 | }
69 | 


--------------------------------------------------------------------------------
/src/sql/parser/mod.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use sqlparser::{ast::Statement, dialect::PostgreSqlDialect, parser::Parser};
 3 | 
 4 | pub fn parse_sql(sql: &str) -> QuillSQLResult<Vec<Statement>> {
 5 |     // Lightweight rewrite for unsupported SHOW syntax under Postgres dialect
 6 |     // Maps to information_schema queries to keep planner/executor simple.
 7 |     let normalized = sql.trim().trim_end_matches(';').trim();
 8 |     let lower = normalized.to_ascii_lowercase();
 9 | 
10 |     let rewritten = if lower == "show databases" || lower == "show database" {
11 |         // List schemas (databases) from information_schema.schemas
12 |         Some("select schema from information_schema.schemas".to_string())
13 |     } else if lower == "show tables" {
14 |         // List all tables
15 |         Some("select table_name from information_schema.tables".to_string())
16 |     } else if lower.starts_with("set transaction") {
17 |         let rest = normalized["set transaction".len()..].trim_start();
18 |         Some(format!("SET TRANSACTION {}", rest))
19 |     } else if lower.starts_with("set session transaction") {
20 |         let rest = normalized["set session transaction".len()..].trim_start();
21 |         Some(format!(
22 |             "SET SESSION CHARACTERISTICS AS TRANSACTION {}",
23 |             rest
24 |         ))
25 |     } else {
26 |         None
27 |     };
28 | 
29 |     let sql_to_parse = rewritten.as_deref().unwrap_or(normalized);
30 |     let stmts = Parser::parse_sql(&PostgreSqlDialect {}, sql_to_parse)?;
31 |     for stmt in &stmts {
32 |         match stmt {
33 |             Statement::StartTransaction { .. }
34 |             | Statement::Commit { .. }
35 |             | Statement::Rollback { .. }
36 |             | Statement::SetTransaction { .. } => {}
37 |             _ => {}
38 |         }
39 |     }
40 |     Ok(stmts)
41 | }
42 | 
43 | #[cfg(test)]
44 | mod tests {
45 | 
46 |     #[test]
47 |     pub fn test_parser() {
48 |         let sql = "select * from (select * from t1)";
49 |         let stmts = super::parse_sql(sql).unwrap();
50 |         println!("{:#?}", stmts[0]);
51 |     }
52 | }
53 | 


--------------------------------------------------------------------------------
/docs/src/modules/background.md:
--------------------------------------------------------------------------------
 1 | # Background Services
 2 | 
 3 | `src/background/` hosts the asynchronous workers that keep a database healthy: WAL
 4 | writers, checkpoints, buffer flushers, and MVCC vacuum. A central registry makes it easy
 5 | to start/stop workers together—ideal for teaching how background maintenance supports
 6 | foreground queries.
 7 | 
 8 | ---
 9 | 
10 | ## Responsibilities
11 | 
12 | - Start workers according to configuration (`WalOptions`, `MvccVacuumConfig`, etc.).
13 | - Define lightweight traits (`CheckpointWal`, `BufferMaintenance`, `TxnSnapshotOps`) so
14 |   workers can run without pulling in an async runtime.
15 | - Provide `BackgroundWorkers`, a registry that tracks `WorkerHandle`s and shuts them down
16 |   when `Database` drops.
17 | 
18 | ---
19 | 
20 | ## Built-in Workers
21 | 
22 | | Worker | Trigger | Behavior |
23 | | ------ | ------- | -------- |
24 | | WAL writer | `wal_writer_interval_ms` | Calls `WalManager::background_flush` to durably write log buffers. |
25 | | Checkpoint | `checkpoint_interval_ms` | Captures dirty page / active txn tables and emits `Checkpoint` records to bound recovery. |
26 | | Buffer writer | `bg_writer_interval` | Flushes dirty frames to reduce checkpoint pressure. |
27 | | MVCC vacuum | `MvccVacuumConfig` | Removes obsolete tuple versions once `safe_xmin` advances. |
28 | 
29 | Every worker registers itself with `BackgroundWorkers`; `shutdown_all()` ensures threads
30 | exit cleanly during tests or process teardown.
31 | 
32 | ---
33 | 
34 | ## Interactions
35 | 
36 | - **WalManager** – WAL writer and checkpoint workers operate on `Arc<dyn CheckpointWal>`.
37 | - **BufferManager** – background flushers inspect dirty frames and help checkpoints
38 |   capture consistent snapshots.
39 | - **TransactionManager** – MVCC vacuum queries `TxnSnapshotOps` for `safe_xmin`.
40 | 
41 | ---
42 | 
43 | ## Teaching Ideas
44 | 
45 | - Tune `MvccVacuumConfig::batch_limit` and chart how quickly old tuple versions disappear.
46 | - Disable a worker in tests to show why unflushed WAL or missing checkpoints lengthen
47 |   recovery.
48 | - Enable `RUST_LOG=background=info` to trace how these tasks complement foreground load.
49 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/limit.rs:
--------------------------------------------------------------------------------
 1 | use std::sync::atomic::AtomicUsize;
 2 | use std::sync::Arc;
 3 | 
 4 | use crate::catalog::SchemaRef;
 5 | use crate::{
 6 |     error::QuillSQLResult,
 7 |     execution::{ExecutionContext, VolcanoExecutor},
 8 |     storage::tuple::Tuple,
 9 | };
10 | 
11 | use super::PhysicalPlan;
12 | 
13 | #[derive(Debug)]
14 | pub struct PhysicalLimit {
15 |     pub limit: Option<usize>,
16 |     pub offset: usize,
17 |     pub input: Arc<PhysicalPlan>,
18 | 
19 |     cursor: AtomicUsize,
20 | }
21 | impl PhysicalLimit {
22 |     pub fn new(limit: Option<usize>, offset: usize, input: Arc<PhysicalPlan>) -> Self {
23 |         PhysicalLimit {
24 |             limit,
25 |             offset,
26 |             input,
27 |             cursor: AtomicUsize::new(0),
28 |         }
29 |     }
30 | }
31 | impl VolcanoExecutor for PhysicalLimit {
32 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
33 |         self.input.init(context)?;
34 |         self.cursor.store(0, std::sync::atomic::Ordering::SeqCst);
35 |         Ok(())
36 |     }
37 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
38 |         loop {
39 |             let next_tuple = self.input.next(context)?;
40 |             if next_tuple.is_none() {
41 |                 return Ok(None);
42 |             }
43 |             let cursor = self
44 |                 .cursor
45 |                 .fetch_add(1, std::sync::atomic::Ordering::SeqCst);
46 |             if cursor < self.offset {
47 |                 continue;
48 |             }
49 |             return if let Some(limit) = self.limit {
50 |                 if cursor < self.offset + limit {
51 |                     Ok(next_tuple)
52 |                 } else {
53 |                     Ok(None)
54 |                 }
55 |             } else {
56 |                 Ok(next_tuple)
57 |             };
58 |         }
59 |     }
60 | 
61 |     fn output_schema(&self) -> SchemaRef {
62 |         self.input.output_schema()
63 |     }
64 | }
65 | 
66 | impl std::fmt::Display for PhysicalLimit {
67 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
68 |         write!(f, "Limit")
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/drop_table.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF};
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::execution::{ExecutionContext, VolcanoExecutor};
 4 | use crate::storage::tuple::Tuple;
 5 | use crate::transaction::LockMode;
 6 | use crate::utils::table_ref::TableReference;
 7 | 
 8 | #[derive(Debug)]
 9 | pub struct PhysicalDropTable {
10 |     table: TableReference,
11 |     if_exists: bool,
12 | }
13 | 
14 | impl PhysicalDropTable {
15 |     pub fn new(table: TableReference, if_exists: bool) -> Self {
16 |         Self { table, if_exists }
17 |     }
18 | 
19 |     fn qualified_name(&self) -> String {
20 |         self.table.to_string()
21 |     }
22 | }
23 | 
24 | impl VolcanoExecutor for PhysicalDropTable {
25 |     fn init(&self, _context: &mut ExecutionContext) -> QuillSQLResult<()> {
26 |         Ok(())
27 |     }
28 | 
29 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
30 |         if context.catalog.try_table_heap(&self.table).is_none() {
31 |             if self.if_exists {
32 |                 return Ok(None);
33 |             }
34 |             return Err(QuillSQLError::Execution(format!(
35 |                 "table {} does not exist",
36 |                 self.qualified_name()
37 |             )));
38 |         }
39 | 
40 |         context
41 |             .txn_ctx()
42 |             .ensure_writable(&self.table, "DROP TABLE")?;
43 |         context
44 |             .txn_ctx_mut()
45 |             .lock_table(self.table.clone(), LockMode::Exclusive)?;
46 | 
47 |         let dropped = context.catalog.drop_table(&self.table)?;
48 |         if !dropped && !self.if_exists {
49 |             return Err(QuillSQLError::Execution(format!(
50 |                 "table {} does not exist",
51 |                 self.qualified_name()
52 |             )));
53 |         }
54 | 
55 |         Ok(None)
56 |     }
57 | 
58 |     fn output_schema(&self) -> SchemaRef {
59 |         EMPTY_SCHEMA_REF.clone()
60 |     }
61 | }
62 | 
63 | impl std::fmt::Display for PhysicalDropTable {
64 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65 |         write!(f, "DropTable: {}", self.table)
66 |     }
67 | }
68 | 


--------------------------------------------------------------------------------
/docs/src/buffer/page.md:
--------------------------------------------------------------------------------
 1 | # Page & Page Guards
 2 | 
 3 | Before the Buffer Manager can hand out a reference to a page in memory, it must ensure that the page won't be evicted while it's being used by another thread. This is accomplished by **pinning**.
 4 | 
 5 | ## Pinning
 6 | 
 7 | Pinning simply means incrementing a "pin count" associated with the page's frame in the buffer pool. A frame with a pin count greater than zero is forbidden from being chosen as a victim by the page replacer. 
 8 | 
 9 | - When a thread wants to use a page, it must first pin it.
10 | - When the thread is finished with the page, it must **unpin** it (decrementing the count).
11 | 
12 | Manually managing pin counts is tedious and error-prone. Forgetting to unpin a page leads to a memory leak, as the frame can never be evicted. To solve this, QuillSQL uses a common and powerful C++ and Rust pattern: **Resource Acquisition Is Initialization (RAII)**.
13 | 
14 | ## `ReadPageGuard` and `WritePageGuard`
15 | 
16 | Instead of returning a raw pointer to the page memory, the `BufferManager`'s `fetch_page_*` methods return a **guard** object: `ReadPageGuard` or `WritePageGuard`.
17 | 
18 | These guards are responsible for the lifetime of the pin and the lock on the page:
19 | 
20 | 1.  **Acquisition**: When a `PageGuard` is created, its constructor acquires the appropriate lock (`RwLock`) on the page's frame and increments the frame's pin count.
21 |     - `ReadPageGuard` takes a read lock, allowing multiple concurrent readers.
22 |     - `WritePageGuard` takes an exclusive write lock.
23 | 
24 | 2.  **Usage**: The calling code uses the guard object to access the page's data. The guard provides safe, locked access to the underlying byte array.
25 | 
26 | 3.  **Release**: When the guard variable goes out of scope (e.g., at the end of a function), its `drop()` method is automatically called by the Rust compiler. This `drop()` implementation handles all the cleanup:
27 |     - It decrements the pin count.
28 |     - It releases the lock on the frame.
29 |     - If it's a `WritePageGuard` and the data was modified, it informs the `BufferManager` that the page is now **dirty**.
30 | 
31 | This RAII pattern makes using the buffer pool much safer and more ergonomic, as it makes it impossible to forget to unpin a page or release a lock.
32 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | 
 2 | [package]
 3 | name = "quill-sql"
 4 | version = "0.2.0"
 5 | edition = "2021"
 6 | description = "A tiny yet serious SQL database in Rust with ARIES-style WAL, 2PL, and B+Tree indexes."
 7 | license = "MIT"
 8 | repository = "https://github.com/feichai0017/quillsql"
 9 | readme = "README.md"
10 | 
11 | [dependencies]
12 | log = "0.4.21"
13 | stack-map = "1.0.5"
14 | ebr = { version = "0.2.13" }
15 | bztree = "0.2.0"
16 | crossbeam-epoch = "0.9.18"
17 | parking_lot = { version = "0.12.1", features = ["deadlock_detection"] }
18 | parking_lot_core = { version = "0.9.0", features = ["deadlock_detection"] }
19 | serial_test = "3.2.0"
20 | dashmap = "6.1.0"
21 | derive-with = "0.6.0"
22 | derive-new = "0.7.0"
23 | comfy-table = "7.1.0"
24 | bincode = "1.3.3"
25 | rand = "0.9.1"
26 | fastrand = "2.0"
27 | thiserror = "2.0.12"
28 | nom = "7.0.0"
29 | logos = "0.15.0"
30 | strum = { version = "0.26", features = ["derive"] }
31 | sqlparser = "0.34.0"
32 | clap = { version = "4.3.19", features = ["derive", "cargo"] }
33 | sqllogictest = "0.13.0"
34 | regex = "1.9.1"
35 | glob = "0.3.1"
36 | rayon = "1.8.0"
37 | string-error = "0.1.0"
38 | memmap2 = "0.9.1"
39 | natord = "1.0.0"
40 | env_logger = "0.11.5"
41 | serde = { version = "1.0", features = ["derive"] }
42 | serde_bytes = "0.11.15"
43 | fs4 = "0.8.4"
44 | tempfile = "3.12.0"
45 | tokio = { version = "1.41.1", features = ["full"] }
46 | tokio-util = { version = "0.7.12", features = ["full"] }
47 | tokio-stream = "0.1.16"
48 | futures = "0.3.31"
49 | bytes = "1.0.0"
50 | rustyline = "15.0.0"
51 | axum = { version = "0.7", features = ["macros", "json"] }
52 | tower = "0.4"
53 | tower-http = { version = "0.5", features = ["fs", "trace", "cors"] }
54 | serde_json = "1.0"
55 | once_cell = "1.19"
56 | io-uring = { version = "0.6", optional = true }
57 | crc32fast = "1.4"
58 | libc = "0.2.176"
59 | crossbeam-channel = "0.5.15"
60 | 
61 | [target.'cfg(target_os = "linux")'.dependencies]
62 | io-uring = { version = "0.6" }
63 | 
64 | [dev-dependencies]
65 | criterion = { version = "0.5", features = ["html_reports"] }
66 | rand_chacha = "0.9"
67 | rusqlite = { version = "0.31", features = ["bundled"] }
68 | postgres = "0.19"
69 | pprof = { version = "0.15.0", features = ["criterion", "flamegraph"] }
70 | 
71 | [[bench]]
72 | name = "storage_bench"
73 | harness = false
74 | 


--------------------------------------------------------------------------------
/docs/src/modules/expression.md:
--------------------------------------------------------------------------------
 1 | # Expression & Scalar Evaluation
 2 | 
 3 | The expression subsystem (`src/expression/`) powers column computations, predicates, and
 4 | UPDATE assignments. It keeps expression trees approachable while demonstrating how they
 5 | are evaluated during execution.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Store planner-produced expression trees (`Expr`) in a serializable, traversable enum.
12 | - Bind column references, constants, and built-in functions.
13 | - Evaluate expressions against `Tuple`s at runtime, yielding `ScalarValue`.
14 | - Provide type inference and casting so arithmetic/comparison operators remain well-typed.
15 | 
16 | ---
17 | 
18 | ## Directory Layout
19 | 
20 | | Path | Description | Key Types |
21 | | ---- | ----------- | --------- |
22 | | `mod.rs` | Public API and core enum. | `Expr`, `ExprTrait` |
23 | | `scalar.rs` | Runtime scalar representation + conversions. | `ScalarValue`, `DataType` |
24 | | `binder.rs` | Helpers for the planner/SQL binder. | `BoundExpr` |
25 | 
26 | ---
27 | 
28 | ## Concepts
29 | 
30 | ### Expr Enum
31 | Expresses column refs, literals, comparisons, logical ops, arithmetic, and function
32 | invocations. Each variant implements `ExprTrait::evaluate(&self, tuple)` and returns a
33 | `ScalarValue`.
34 | 
35 | ### ScalarValue
36 | Unified runtime value across types (int, bigint, bool, decimal, varchar, …). Includes
37 | `cast_to(DataType)` so results can be coerced to the target column type before writes.
38 | 
39 | ### Type Inference
40 | Planner code invokes `Expr::data_type(schema)` to predict result types. Execution then
41 | casts when needed—e.g., `UPDATE t SET a = b + 1` uses the column’s declared type for `a`.
42 | 
43 | ---
44 | 
45 | ## Interactions
46 | 
47 | - **Planner** – builds `Expr` trees with bound columns; execution reuses them verbatim.
48 | - **ExecutionContext** – exposes `eval_expr` and `eval_predicate`, wrapping expression
49 |   evaluation plus boolean coercion (`NULL` becomes false for predicates).
50 | - **Optimizer** – rules like constant folding traverse `Expr` trees and reuse
51 |   `ScalarValue` arithmetic helpers.
52 | 
53 | ---
54 | 
55 | ## Teaching Ideas
56 | 
57 | - Add a simple built-in function (`length(expr)`) to follow the pipeline from parsing to
58 |   evaluation.
59 | - Implement short-circuiting or full three-valued boolean logic and validate with
60 |   sqllogictest.
61 | - Instrument `Expr::evaluate` with tracing to visualise expression evaluation inside
62 |   physical operators.
63 | 


--------------------------------------------------------------------------------
/src/optimizer/rule/push_down_filter.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::optimizer::logical_optimizer::ApplyOrder;
 3 | use crate::optimizer::LogicalOptimizerRule;
 4 | use crate::plan::logical_plan::LogicalPlan;
 5 | 
 6 | /// Attach `Filter` predicates directly to the underlying `TableScan`.
 7 | /// This lets the physical planner decide whether the scan itself can honor
 8 | /// the predicate (e.g. via index) while keeping the logical tree shallower.
 9 | pub struct PushDownFilterToScan;
10 | 
11 | impl LogicalOptimizerRule for PushDownFilterToScan {
12 |     fn try_optimize(&self, plan: &LogicalPlan) -> QuillSQLResult<Option<LogicalPlan>> {
13 |         let LogicalPlan::Filter(filter) = plan else {
14 |             return Ok(None);
15 |         };
16 | 
17 |         match filter.input.as_ref() {
18 |             LogicalPlan::TableScan(scan) => {
19 |                 let mut new_scan = scan.clone();
20 |                 new_scan.filters.push(filter.predicate.clone());
21 |                 Ok(Some(LogicalPlan::TableScan(new_scan)))
22 |             }
23 |             _ => Ok(None),
24 |         }
25 |     }
26 | 
27 |     fn name(&self) -> &str {
28 |         "PushDownFilterToScan"
29 |     }
30 | 
31 |     fn apply_order(&self) -> Option<ApplyOrder> {
32 |         Some(ApplyOrder::TopDown)
33 |     }
34 | }
35 | 
36 | #[cfg(test)]
37 | mod tests {
38 |     use crate::database::Database;
39 |     use crate::optimizer::rule::PushDownFilterToScan;
40 |     use crate::optimizer::LogicalOptimizer;
41 |     use crate::plan::logical_plan::LogicalPlan;
42 |     use std::sync::Arc;
43 | 
44 |     fn build_optimizer() -> LogicalOptimizer {
45 |         LogicalOptimizer::with_rules(vec![Arc::new(PushDownFilterToScan)])
46 |     }
47 | 
48 |     #[test]
49 |     fn pushes_filter_into_scan() {
50 |         let mut db = Database::new_temp().unwrap();
51 |         db.run("create table t1 (a int)").unwrap();
52 | 
53 |         let plan = db
54 |             .create_logical_plan("select * from t1 where a > 10")
55 |             .unwrap();
56 |         let optimized_plan = build_optimizer().optimize(&plan).unwrap();
57 | 
58 |         match optimized_plan {
59 |             LogicalPlan::Project(project) => match project.input.as_ref() {
60 |                 LogicalPlan::TableScan(scan) => assert_eq!(scan.filters.len(), 1),
61 |                 other => panic!("expected TableScan under project, got {other:?}"),
62 |             },
63 |             other => panic!("expected Project after pushdown, got {other:?}"),
64 |         }
65 |     }
66 | }
67 | 


--------------------------------------------------------------------------------
/docs/src/modules/execution.md:
--------------------------------------------------------------------------------
 1 | # Execution Engine
 2 | 
 3 | `src/execution/` drives `PhysicalPlan` trees using the Volcano (iterator) model. Every
 4 | operator pulls tuples from its children, coordinating closely with transactions,
 5 | storage, and expression evaluation.
 6 | 
 7 | ---
 8 | 
 9 | ## Core Components
10 | 
11 | | Component | Role |
12 | | --------- | ---- |
13 | | `PhysicalPlan` | Enum covering all physical operators; each implements `VolcanoExecutor`. |
14 | | `ExecutionContext` | Shared context carrying the catalog, `TxnContext`, storage engine, and expression helpers. |
15 | | `TupleStream` | Unified scan interface returned by table/index handles. |
16 | 
17 | ---
18 | 
19 | ## Execution Flow
20 | 
21 | 1. `ExecutionEngine::execute` calls `init` on the root plan (and recursively on children).
22 | 2. The engine loops calling `next`, with parents pulling tuples from children.
23 | 3. `ExecutionContext` supplies transaction snapshots, lock helpers, and expression
24 |    evaluation per call.
25 | 4. Once `next` returns `None`, the accumulated results are returned to the caller (CLI,
26 |    HTTP API, or tests).
27 | 
28 | ---
29 | 
30 | ## Operator Examples
31 | 
32 | - **PhysicalSeqScan** – acquires a `table_stream` from the storage engine, uses
33 |   `ScanPrefetch` for batching, and relies on `TxnContext::read_visible_tuple` for MVCC.
34 | - **PhysicalIndexScan** – uses `index_stream`, tracks `invisible_hits`, and notifies the
35 |   catalog when garbage accumulates.
36 | - **PhysicalUpdate/PhysicalDelete** – call `prepare_row_for_write` to re-validate locks
37 |   and the latest tuple before invoking `apply_update/delete`.
38 | - **PhysicalNestedLoopJoin** – showcases the parent/child pull loop and acts as a baseline
39 |   for more advanced joins.
40 | 
41 | ---
42 | 
43 | ## Interactions
44 | 
45 | - **StorageEngine** – all data access goes through handles/streams, keeping execution
46 |   storage-agnostic.
47 | - **Transaction** – `TxnContext` enforces locking, snapshots, and undo logging; operators
48 |   never talk to `LockManager` directly.
49 | - **Expression** – `ExecutionContext::eval_expr` / `eval_predicate` evaluate expressions
50 |   built by the planner.
51 | - **Optimizer/Planner** – execution honours the plan as-is; all structural choices happen
52 |   upstream.
53 | 
54 | ---
55 | 
56 | ## Teaching Ideas
57 | 
58 | - Implement a new operator (e.g., `PhysicalMergeJoin`) to see how `ExecutionContext`
59 |   support generalises.
60 | - Add adaptive prefetching inside `PhysicalSeqScan` to explore iterator hints.
61 | - Enable `RUST_LOG=execution=trace` to watch the `init`/`next` call sequence during a
62 |   query.
63 | 
64 | ---
65 | 
66 | Further reading: [The Volcano Execution Model](../execution/volcano.md)
67 | 


--------------------------------------------------------------------------------
/src/optimizer/rule/eliminate_limit.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::optimizer::logical_optimizer::ApplyOrder;
 3 | use crate::optimizer::LogicalOptimizerRule;
 4 | use crate::plan::logical_plan::{EmptyRelation, LogicalPlan};
 5 | 
 6 | pub struct EliminateLimit;
 7 | 
 8 | impl LogicalOptimizerRule for EliminateLimit {
 9 |     fn try_optimize(&self, plan: &LogicalPlan) -> QuillSQLResult<Option<LogicalPlan>> {
10 |         if let LogicalPlan::Limit(limit) = plan {
11 |             match limit.limit {
12 |                 Some(fetch) => {
13 |                     if fetch == 0 {
14 |                         return Ok(Some(LogicalPlan::EmptyRelation(EmptyRelation {
15 |                             produce_one_row: false,
16 |                             schema: limit.input.schema().clone(),
17 |                         })));
18 |                     }
19 |                 }
20 |                 None => {
21 |                     if limit.offset == 0 {
22 |                         let input = limit.input.as_ref();
23 |                         // input also can be Limit, so we should apply again.
24 |                         return Ok(Some(
25 |                             self.try_optimize(input)?.unwrap_or_else(|| input.clone()),
26 |                         ));
27 |                     }
28 |                 }
29 |             }
30 |         }
31 |         Ok(None)
32 |     }
33 | 
34 |     fn name(&self) -> &str {
35 |         "EliminateLimit"
36 |     }
37 | 
38 |     fn apply_order(&self) -> Option<ApplyOrder> {
39 |         Some(ApplyOrder::BottomUp)
40 |     }
41 | }
42 | 
43 | #[cfg(test)]
44 | mod tests {
45 |     use crate::database::Database;
46 |     use crate::optimizer::rule::EliminateLimit;
47 |     use crate::optimizer::LogicalOptimizer;
48 |     use crate::plan::logical_plan::LogicalPlan;
49 |     use std::sync::Arc;
50 | 
51 |     fn build_optimizer() -> LogicalOptimizer {
52 |         LogicalOptimizer::with_rules(vec![Arc::new(EliminateLimit)])
53 |     }
54 | 
55 |     #[test]
56 |     fn eliminate_limit() {
57 |         let mut db = Database::new_temp().unwrap();
58 |         db.run("create table t1 (a int)").unwrap();
59 | 
60 |         let plan = db.create_logical_plan("select a from t1 limit 0").unwrap();
61 |         let optimized_plan = build_optimizer().optimize(&plan).unwrap();
62 |         assert!(matches!(optimized_plan, LogicalPlan::EmptyRelation(_)));
63 | 
64 |         let plan = db.create_logical_plan("select a from t1 offset 0").unwrap();
65 |         let optimized_plan = build_optimizer().optimize(&plan).unwrap();
66 |         if let LogicalPlan::Project(p) = optimized_plan {
67 |             assert!(matches!(p.input.as_ref(), LogicalPlan::TableScan(_)));
68 |         } else {
69 |             panic!("the first node should be project");
70 |         }
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/drop_index.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::{SchemaRef, EMPTY_SCHEMA_REF};
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::execution::{ExecutionContext, VolcanoExecutor};
 4 | use crate::storage::tuple::Tuple;
 5 | use crate::transaction::LockMode;
 6 | 
 7 | #[derive(Debug)]
 8 | pub struct PhysicalDropIndex {
 9 |     pub name: String,
10 |     pub schema: Option<String>,
11 |     pub catalog: Option<String>,
12 |     pub if_exists: bool,
13 | }
14 | 
15 | impl PhysicalDropIndex {
16 |     pub fn new(
17 |         name: String,
18 |         schema: Option<String>,
19 |         catalog: Option<String>,
20 |         if_exists: bool,
21 |     ) -> Self {
22 |         Self {
23 |             name,
24 |             schema,
25 |             catalog,
26 |             if_exists,
27 |         }
28 |     }
29 | 
30 |     fn qualified_name(&self) -> String {
31 |         match (&self.catalog, &self.schema) {
32 |             (Some(catalog), Some(schema)) => format!("{catalog}.{schema}.{}", self.name),
33 |             (None, Some(schema)) => format!("{schema}.{}", self.name),
34 |             _ => self.name.clone(),
35 |         }
36 |     }
37 | }
38 | 
39 | impl VolcanoExecutor for PhysicalDropIndex {
40 |     fn init(&self, _context: &mut ExecutionContext) -> QuillSQLResult<()> {
41 |         Ok(())
42 |     }
43 | 
44 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
45 |         let owner = context.catalog.find_index_owner(
46 |             self.catalog.as_deref(),
47 |             self.schema.as_deref(),
48 |             &self.name,
49 |         );
50 | 
51 |         let Some(table_ref) = owner else {
52 |             if self.if_exists {
53 |                 return Ok(None);
54 |             }
55 |             return Err(QuillSQLError::Execution(format!(
56 |                 "index {} does not exist",
57 |                 self.qualified_name()
58 |             )));
59 |         };
60 | 
61 |         context
62 |             .txn_ctx()
63 |             .ensure_writable(&table_ref, "DROP INDEX")?;
64 |         context
65 |             .txn_ctx_mut()
66 |             .lock_table(table_ref.clone(), LockMode::Exclusive)?;
67 | 
68 |         let dropped = context.catalog.drop_index(&table_ref, &self.name)?;
69 |         if !dropped && !self.if_exists {
70 |             return Err(QuillSQLError::Execution(format!(
71 |                 "index {} does not exist",
72 |                 self.qualified_name()
73 |             )));
74 |         }
75 | 
76 |         Ok(None)
77 |     }
78 | 
79 |     fn output_schema(&self) -> SchemaRef {
80 |         EMPTY_SCHEMA_REF.clone()
81 |     }
82 | }
83 | 
84 | impl std::fmt::Display for PhysicalDropIndex {
85 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 |         write!(f, "DropIndex: {}", self.qualified_name())
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/optimizer/rule/push_down_limit.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::optimizer::logical_optimizer::ApplyOrder;
 3 | use crate::optimizer::LogicalOptimizerRule;
 4 | use crate::plan::logical_plan::{LogicalPlan, Sort};
 5 | 
 6 | pub struct PushDownLimit;
 7 | 
 8 | impl LogicalOptimizerRule for PushDownLimit {
 9 |     fn try_optimize(&self, plan: &LogicalPlan) -> QuillSQLResult<Option<LogicalPlan>> {
10 |         let LogicalPlan::Limit(limit) = plan else {
11 |             return Ok(None);
12 |         };
13 | 
14 |         let Some(limit_value) = limit.limit else {
15 |             return Ok(None);
16 |         };
17 | 
18 |         match limit.input.as_ref() {
19 |             LogicalPlan::Sort(sort) => {
20 |                 let new_limit = {
21 |                     let sort_limit = limit.offset + limit_value;
22 |                     Some(sort.limit.map(|f| f.min(sort_limit)).unwrap_or(sort_limit))
23 |                 };
24 |                 if new_limit == sort.limit {
25 |                     Ok(None)
26 |                 } else {
27 |                     let new_sort = LogicalPlan::Sort(Sort {
28 |                         order_by: sort.order_by.clone(),
29 |                         input: sort.input.clone(),
30 |                         limit: new_limit,
31 |                     });
32 |                     plan.with_new_inputs(&[new_sort]).map(Some)
33 |                 }
34 |             }
35 |             _ => Ok(None),
36 |         }
37 |     }
38 | 
39 |     fn name(&self) -> &str {
40 |         "PushDownLimit"
41 |     }
42 | 
43 |     fn apply_order(&self) -> Option<ApplyOrder> {
44 |         Some(ApplyOrder::TopDown)
45 |     }
46 | }
47 | 
48 | #[cfg(test)]
49 | mod tests {
50 |     use crate::database::Database;
51 |     use crate::optimizer::rule::PushDownLimit;
52 |     use crate::optimizer::LogicalOptimizer;
53 |     use crate::plan::logical_plan::{LogicalPlan, Sort};
54 |     use std::sync::Arc;
55 | 
56 |     fn build_optimizer() -> LogicalOptimizer {
57 |         LogicalOptimizer::with_rules(vec![Arc::new(PushDownLimit)])
58 |     }
59 | 
60 |     #[test]
61 |     fn push_down_limit() {
62 |         let mut db = Database::new_temp().unwrap();
63 |         db.run("create table t1 (a int)").unwrap();
64 | 
65 |         let plan = db
66 |             .create_logical_plan("select a from t1 order by a limit 10")
67 |             .unwrap();
68 |         let optimized_plan = build_optimizer().optimize(&plan).unwrap();
69 | 
70 |         if let LogicalPlan::Limit(limit) = optimized_plan {
71 |             if let LogicalPlan::Sort(Sort { limit, .. }) = limit.input.as_ref() {
72 |                 assert_eq!(limit, &Some(10));
73 |             } else {
74 |                 panic!("the second node should be limit");
75 |             }
76 |         } else {
77 |             panic!("the first node should be limit");
78 |         }
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/docs/src/modules/sql.md:
--------------------------------------------------------------------------------
 1 | # SQL Front-End
 2 | 
 3 | The SQL front-end lives in `src/sql/`. It turns raw UTF-8 query text into the abstract
 4 | syntax trees (ASTs) consumed by planning, while layering Quill-specific name handling
 5 | and diagnostics on top of [`sqlparser`](https://docs.rs/sqlparser).
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Parse SQL text into `sqlparser::ast::Statement` values.
12 | - Record precise spans so error messages can highlight the exact byte range.
13 | - Normalise identifiers (case folding, quoted names, multi-part paths).
14 | - Provide helper traits so the logical planner can lower AST nodes without duplicating
15 |   syntax checks.
16 | 
17 | ---
18 | 
19 | ## Directory Layout
20 | 
21 | | Path | Purpose | Key Types |
22 | | ---- | ------- | --------- |
23 | | `lexer.rs` | Token helpers that preserve offsets. | `Token`, `TokenExt` |
24 | | `parser.rs` | Single entry point used across the codebase. | `parse_sql`, `SqlInput` |
25 | | `ast/mod.rs` | Planner-facing helpers. | `NormalizedIdent`, `ObjectNameExt` |
26 | | `error.rs` | Span-aware parser errors. | `SqlError`, `SqlSpan` |
27 | 
28 | ---
29 | 
30 | ## Parsing Pipeline
31 | 
32 | 1. **Lexing** – wrap sqlparser’s lexer so every token keeps start/end offsets.
33 | 2. **AST generation** – invoke sqlparser to produce standard `Statement` structs.
34 | 3. **Normalisation** – convert identifiers into `NormalizedIdent`, deal with schema
35 |    qualifiers, and build pieces of `TableReference`.
36 | 4. **Planner bridge** – traits like `ColumnRefExt` expose methods such as `relation()` or
37 |    `column()` so `LogicalPlanner` can treat different SQL syntaxes uniformly.
38 | 
39 | ---
40 | 
41 | ## Interactions
42 | 
43 | - **Logical planner** consumes the AST directly and relies on helper traits from this
44 |   module to convert identifiers into catalog references.
45 | - **Database / Session** catch `SqlError` values, so both CLI and HTTP front-ends show
46 |   consistent caret diagnostics.
47 | - **Tests** (`tests/sql_example/*.slt`, `tests/sql_parser.rs`) assert on parser output and
48 |   error strings to keep teaching feedback stable.
49 | 
50 | ---
51 | 
52 | ## Implementation Notes
53 | 
54 | - `SqlSpan` stores byte offsets, which makes it trivial to slice the original SQL and
55 |   render highlighted errors.
56 | - Extended statements (e.g., `EXPLAIN`, `BEGIN TRANSACTION`) show how to Layer
57 |   Quill-specific syntax without forking sqlparser entirely.
58 | - We avoid desugaring at this stage so students can trace SQL → AST → logical plan step
59 |   by step.
60 | 
61 | ---
62 | 
63 | ## Teaching Ideas
64 | 
65 | - Add a new statement (`CREATE VIEW`, `ALTER TABLE ...`) and follow the AST through the
66 |   pipeline.
67 | - Improve error hints (“Did you forget FROM?”) to see how better diagnostics aid users.
68 | - Write fuzz tests that round-trip SQL → AST → SQL to discuss parser determinism.
69 | 


--------------------------------------------------------------------------------
/docs/src/modules/buffer.md:
--------------------------------------------------------------------------------
 1 | # Buffer Manager
 2 | 
 3 | The buffer manager (`src/buffer/`) implements QuillSQL’s shared buffer pool, bridging the
 4 | speed gap between RAM and disk. It lets storage/execution read and write pages safely
 5 | while coordinating with WAL and asynchronous I/O.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Maintain a fixed-size set of page frames caching `TableHeap` and B+Tree pages.
12 | - Expose RAII-style guards (pin/unpin) that enforce safe concurrent access.
13 | - Keep the page table, replacement policy, dirty-page tracking, and WAL coordination in
14 |   sync.
15 | - Submit async I/O through `DiskScheduler`.
16 | 
17 | ---
18 | 
19 | ## Directory Layout
20 | 
21 | | Path | Description | Key Types |
22 | | ---- | ----------- | --------- |
23 | | `buffer_manager.rs` | Core buffer pool. | `BufferManager`, `BufferFrame` |
24 | | `page.rs` | Guard types and pin/unpin logic. | `ReadPageGuard`, `WritePageGuard` |
25 | | `replacer.rs` | LRU-K + TinyLFU replacement. | `Replacer` |
26 | | `metrics.rs` | Optional instrumentation hooks. | `BufferMetrics` |
27 | 
28 | ---
29 | 
30 | ## Key Mechanisms
31 | 
32 | ### Guard Model
33 | - `ReadPageGuard`, `WritePageGuard`, and `UpgradeableGuard` ensure only compatible access
34 |   modes coexist on a page.
35 | - Guards drop automatically to release pins; paired with Rust’s borrow checker, they make
36 |   latch semantics tangible.
37 | 
38 | ### Replacement Policy
39 | - **LRU-K** tracks the last K touches to protect hot pages from scan pollution.
40 | - **TinyLFU** decides whether a new page should enter the cache, offering probabilistic
41 |   admission against noisy workloads.
42 | 
43 | ### WAL Coordination
44 | - Before flushing a dirty page, the buffer checks `page_lsn` and asks `WalManager` to
45 |   flush up to that LSN (write-ahead rule).
46 | - `set_wal_manager` wires the buffer to WAL so checkpoints can inspect the oldest dirty
47 |   LSN.
48 | 
49 | ### Disk Scheduler
50 | - All physical reads/writes go through `DiskScheduler::submit_*`, sharing worker threads
51 |   with WAL and demonstrating the benefits of a unified I/O layer.
52 | 
53 | ---
54 | 
55 | ## Interactions
56 | 
57 | - **Storage engine** – `TableHeap` and `BPlusTreeIndex` access pages exclusively through
58 |   the buffer manager.
59 | - **Recovery** – checkpoints consult the buffer’s dirty page table to build the ARIES DPT.
60 | - **Background writer** – periodically walks `dirty_frames` to flush pages in the
61 |   background.
62 | 
63 | ---
64 | 
65 | ## Teaching Ideas
66 | 
67 | - Disable TinyLFU via feature flag, rerun sqllogictest, and compare hit rates.
68 | - Swap the replacement policy with CLOCK to experiment with cache algorithms.
69 | - Enable `RUST_LOG=buffer=debug` and trace the pin/unpin lifecycle of hot pages.
70 | 
71 | ---
72 | 
73 | Further reading: [Page & Page Guards](../buffer/page.md),
74 | [The Buffer Pool](../buffer/buffer_pool.md)
75 | 


--------------------------------------------------------------------------------
/docs/src/modules/plan.md:
--------------------------------------------------------------------------------
 1 | # Query Planner Module
 2 | 
 3 | `src/plan/` bridges parsed SQL and executable operators. It converts the AST into a
 4 | logical plan, applies rewrites (via the optimizer), and finally emits a physical plan
 5 | (`PhysicalPlan`) that the Volcano engine can run.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | 1. **LogicalPlanner** – walks the AST, binds table/column names using `PlannerContext`,
12 |    performs type checking, and builds a `LogicalPlan` tree.
13 | 2. **PlannerContext** – exposes catalog lookups plus scope information for CTEs, subqueries,
14 |    and aliases.
15 | 3. **PhysicalPlanner** – lowers an optimized `LogicalPlan` into a tree of Volcano operators.
16 | 
17 | ---
18 | 
19 | ## Directory Layout
20 | 
21 | | Path | Description | Key Types |
22 | | ---- | ----------- | --------- |
23 | | `logical_plan.rs` | Logical algebra nodes. | `LogicalPlan`, `LogicalExpr`, `JoinType` |
24 | | `logical_planner.rs` | AST → logical transformation. | `LogicalPlanner` |
25 | | `physical_plan.rs` | `PhysicalPlan` enum definition. | `PhysicalPlan`, `Physical*` structs |
26 | | `physical_planner.rs` | Logical → physical lowering. | `PhysicalPlanner` |
27 | | `planner_context.rs` | Catalog/scope abstraction. | `PlannerContext` |
28 | 
29 | ---
30 | 
31 | ## Workflow
32 | 
33 | 1. **Name binding** – `LogicalPlanner` resolves table + column references, creates
34 |    `TableReference`s, and validates schemas via the catalog.
35 | 2. **Logical tree** – each SQL clause becomes a logical node (FROM → `SeqScan`, WHERE →
36 |    `Filter`, GROUP BY → `Aggregate`, etc.).
37 | 3. **Physical selection** – `PhysicalPlanner` picks concrete algorithms (sequential scan,
38 |    index scan, nested-loop join, sort, limit …). Because every physical node implements
39 |    `VolcanoExecutor`, the execution engine can pull tuples immediately.
40 | 
41 | ---
42 | 
43 | ## Interactions
44 | 
45 | - **SQL front-end** – provides the AST; helper traits (`NormalizedIdent`, etc.) keep name
46 |   resolution consistent.
47 | - **Catalog** – `PlannerContext` relies on it to confirm table/index existence and fetch
48 |   schemas.
49 | - **Optimizer** – operates purely on `LogicalPlan`; the planner must emit clean,
50 |   traversable trees so rules can fire.
51 | - **Execution** – physical nodes carry `TableReference`, `SchemaRef`, and hints that the
52 |   execution engine passes to the storage layer.
53 | 
54 | ---
55 | 
56 | ## Teaching Ideas
57 | 
58 | - Implement a new logical operator (e.g., `LogicalDistinct`) and add the corresponding
59 |   physical operator to trace the full lifecycle.
60 | - Experiment with early projection inside the logical plan and observe its impact on
61 |   downstream operators.
62 | - Use `pretty_format_logical_plan`/`physical_plan` dumps to visualise rewrites before and
63 |   after optimizer passes.
64 | 
65 | ---
66 | 
67 | Further reading: [The Lifecycle of a Query](../plan/lifecycle.md)
68 | 


--------------------------------------------------------------------------------
/docs/src/modules/optimizer.md:
--------------------------------------------------------------------------------
 1 | # Optimizer Module
 2 | 
 3 | `src/optimizer/` contains a lightweight, teaching-friendly rule engine. It rewrites
 4 | `LogicalPlan` trees into cheaper equivalents without requiring a full cost-based
 5 | framework.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Define the `OptimizerRule` trait (“match → rewrite”).
12 | - Ship built-in rules such as predicate pushdown, projection pruning, and limit pushdown.
13 | - Provide a pipeline (`LogicalOptimizer`) that repeatedly applies rules until reaching a
14 |   fixpoint, while remaining extensible for future cost models.
15 | 
16 | ---
17 | 
18 | ## Directory Layout
19 | 
20 | | Path | Description | Key Types |
21 | | ---- | ----------- | --------- |
22 | | `mod.rs` | Optimizer entry point. | `LogicalOptimizer` |
23 | | `rule.rs` | Trait + shared helpers. | `OptimizerRule` |
24 | | `rules/*` | Concrete rewrites. | `PushDownFilter`, `PushDownLimit`, … |
25 | 
26 | ---
27 | 
28 | ## How It Works
29 | 
30 | 1. `LogicalOptimizer::optimize(plan)` iterates through the registered rule list.
31 | 2. Each rule implements `fn apply(&LogicalPlan) -> Option<LogicalPlan>`. Returning `Some`
32 |    means the rule fired; the pipeline restarts to reach a fixpoint.
33 | 3. Rules are pure functions, which keeps them easy to unit test and reason about.
34 | 
35 | Examples:
36 | - **PushDownFilter** moves filters below scans/joins to reduce input size sooner.
37 | - **PushDownLimit** applies LIMIT before expensive joins/sorts when safe.
38 | - **PruneProjection** removes unused columns so execution/storage decode less data.
39 | 
40 | ### Extending With Statistics
41 | 
42 | The optimizer intentionally remains heuristics-only, and the physical planner sticks to
43 | simple sequential scans. For coursework, students can still read `TableStatistics` from
44 | the catalog to prototype their own cardinality estimates or cost heuristics (e.g., to
45 | experiment with when to prefer an index scan), but no estimator ships in-tree.
46 | 
47 | ---
48 | 
49 | ## Interactions
50 | 
51 | - **LogicalPlan** – the optimizer only sees logical nodes; physical/storage layers remain
52 |   untouched.
53 | - **Catalog / Statistics** – current rules are heuristic, but `TableStatistics` remains
54 |   available for students who want to prototype their own cost-based decisions.
55 | - **Execution** – leaner logical plans translate into simpler physical plans (e.g.,
56 |   predicate pushdown allows `PhysicalSeqScan` to discard rows earlier).
57 | 
58 | ---
59 | 
60 | ## Teaching Ideas
61 | 
62 | - Implement a new rule (join reordering, constant folding) and use `RUST_LOG=trace` to
63 |   compare plan dumps before/after.
64 | - Discuss pipeline ordering—swap rule order and observe different outcomes.
65 | - Prototype a tiny cost estimator using row counts from `TableStatistics` to decide on
66 |   index scans vs sequential scans.
67 | 
68 | ---
69 | 
70 | Further reading: [Rule-Based Optimization](../optimizer/rules.md)
71 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_drop.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::plan::logical_plan::{DropIndex, DropTable, LogicalPlan};
 3 | 
 4 | use super::LogicalPlanner;
 5 | 
 6 | impl<'a> LogicalPlanner<'a> {
 7 |     pub fn plan_drop_table(
 8 |         &self,
 9 |         names: &[sqlparser::ast::ObjectName],
10 |         if_exists: bool,
11 |         cascade: bool,
12 |         purge: bool,
13 |     ) -> QuillSQLResult<LogicalPlan> {
14 |         if purge {
15 |             return Err(QuillSQLError::NotSupport(
16 |                 "DROP TABLE ... PURGE is not supported".to_string(),
17 |             ));
18 |         }
19 |         if names.len() != 1 {
20 |             return Err(QuillSQLError::NotSupport(
21 |                 "DROP TABLE only supports a single target".to_string(),
22 |             ));
23 |         }
24 | 
25 |         let table_ref = self.bind_table_name(&names[0])?;
26 |         if cascade {
27 |             // Implicitly drop dependent indexes, so CASCADE behaves the same as default.
28 |             // No-op, but accepted for compatibility.
29 |         }
30 | 
31 |         Ok(LogicalPlan::DropTable(DropTable {
32 |             name: table_ref,
33 |             if_exists,
34 |         }))
35 |     }
36 | 
37 |     pub fn plan_drop_index(
38 |         &self,
39 |         names: &[sqlparser::ast::ObjectName],
40 |         if_exists: bool,
41 |         cascade: bool,
42 |         purge: bool,
43 |     ) -> QuillSQLResult<LogicalPlan> {
44 |         if cascade {
45 |             return Err(QuillSQLError::NotSupport(
46 |                 "DROP INDEX ... CASCADE is not supported".to_string(),
47 |             ));
48 |         }
49 |         if purge {
50 |             return Err(QuillSQLError::NotSupport(
51 |                 "DROP INDEX ... PURGE is not supported".to_string(),
52 |             ));
53 |         }
54 |         if names.len() != 1 {
55 |             return Err(QuillSQLError::NotSupport(
56 |                 "DROP INDEX only supports a single target".to_string(),
57 |             ));
58 |         }
59 | 
60 |         let parts = &names[0].0;
61 |         let (catalog, schema, name) = match parts.as_slice() {
62 |             [ident] => (None, None, ident.value.clone()),
63 |             [schema, ident] => (None, Some(schema.value.clone()), ident.value.clone()),
64 |             [catalog, schema, ident] => (
65 |                 Some(catalog.value.clone()),
66 |                 Some(schema.value.clone()),
67 |                 ident.value.clone(),
68 |             ),
69 |             _ => {
70 |                 return Err(QuillSQLError::Plan(format!(
71 |                     "DROP INDEX name '{}' has too many qualifiers",
72 |                     names[0]
73 |                 )))
74 |             }
75 |         };
76 | 
77 |         Ok(LogicalPlan::DropIndex(DropIndex {
78 |             name,
79 |             schema,
80 |             catalog,
81 |             if_exists,
82 |         }))
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_create_table.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use std::collections::HashSet;
 3 | 
 4 | use crate::catalog::{Column, DataType};
 5 | use crate::expression::Expr;
 6 | use crate::plan::logical_plan::{CreateTable, LogicalPlan};
 7 | use crate::utils::scalar::ScalarValue;
 8 | 
 9 | use super::LogicalPlanner;
10 | 
11 | impl<'a> LogicalPlanner<'a> {
12 |     pub fn plan_create_table(
13 |         &self,
14 |         name: &sqlparser::ast::ObjectName,
15 |         column_defs: &Vec<sqlparser::ast::ColumnDef>,
16 |         if_not_exists: bool,
17 |     ) -> QuillSQLResult<LogicalPlan> {
18 |         let name = self.bind_table_name(name)?;
19 |         let mut columns = vec![];
20 |         for col_def in column_defs {
21 |             let data_type: DataType = (&col_def.data_type).try_into()?;
22 |             let not_null: bool = col_def
23 |                 .options
24 |                 .iter()
25 |                 .any(|opt| matches!(opt.option, sqlparser::ast::ColumnOption::NotNull));
26 |             let default_expr: Option<&sqlparser::ast::Expr> = col_def
27 |                 .options
28 |                 .iter()
29 |                 .find(|opt| matches!(opt.option, sqlparser::ast::ColumnOption::Default(_)))
30 |                 .map(|opt| {
31 |                     if let sqlparser::ast::ColumnOption::Default(expr) = &opt.option {
32 |                         expr
33 |                     } else {
34 |                         unreachable!()
35 |                     }
36 |                 });
37 |             let default = if let Some(expr) = default_expr {
38 |                 let expr = self.bind_expr(expr)?;
39 |                 match expr {
40 |                     Expr::Literal(lit) => lit.value.cast_to(&data_type)?,
41 |                     _ => {
42 |                         return Err(QuillSQLError::Internal(
43 |                             "The expr is not literal".to_string(),
44 |                         ))
45 |                     }
46 |                 }
47 |             } else {
48 |                 ScalarValue::new_empty(data_type)
49 |             };
50 | 
51 |             columns.push(
52 |                 Column::new(col_def.name.value.clone(), data_type, !not_null)
53 |                     .with_relation(Some(name.clone()))
54 |                     .with_default(default),
55 |             )
56 |         }
57 | 
58 |         check_column_name_conflict(&columns)?;
59 |         Ok(LogicalPlan::CreateTable(CreateTable {
60 |             name,
61 |             columns,
62 |             if_not_exists,
63 |         }))
64 |     }
65 | }
66 | 
67 | fn check_column_name_conflict(columns: &[Column]) -> QuillSQLResult<()> {
68 |     let mut names = HashSet::new();
69 |     for col in columns {
70 |         if names.contains(col.name.as_str()) {
71 |             return Err(QuillSQLError::Plan(format!(
72 |                 "Column names have conflict on '{}'",
73 |                 col.name
74 |             )));
75 |         } else {
76 |             names.insert(col.name.as_str());
77 |         }
78 |     }
79 |     Ok(())
80 | }
81 | 


--------------------------------------------------------------------------------
/docs/src/modules/index.md:
--------------------------------------------------------------------------------
 1 | # Index Module
 2 | 
 3 | Indexes live in `src/storage/index/`. QuillSQL currently ships a B+Tree (B-link variant)
 4 | that is exposed to execution via `IndexHandle`. Indexes allow point lookups and range
 5 | scans in O(log n), dramatically reducing the need for full table scans.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Maintain an ordered key → `RecordId` mapping per indexed table.
12 | - Support point probes, range scans, insert/update/delete maintenance.
13 | - Cooperate with MVCC: entries reference heap tuples while visibility checks remain in
14 |   execution/transaction code.
15 | - Provide `IndexHandle::range_scan`, returning a `TupleStream` so physical operators don’t
16 |   need to know tree internals.
17 | 
18 | ---
19 | 
20 | ## Directory Layout
21 | 
22 | | Path | Purpose | Key Types |
23 | | ---- | ------- | --------- |
24 | | `btree_index.rs` | Core B+Tree, page formats, insert/delete logic. | `BPlusTreeIndex` |
25 | | `btree_iterator.rs` | Range-scan iterator with sibling traversal. | `TreeIndexIterator` |
26 | | `btree_codec.rs` | Page encode/decode utilities. | `BPlusTreeLeafPageCodec` |
27 | 
28 | ---
29 | 
30 | ## Key Concepts
31 | 
32 | ### B-link Structure
33 | Each leaf stores a pointer to its right sibling. Iterators use this to keep scanning even
34 | if a concurrent split occurs, avoiding restarts from the root and enabling latch-free
35 | range scans.
36 | 
37 | ### Latch Crabbing
38 | Insert/delete operations climb the tree with shared latches and upgrade only when
39 | necessary (e.g., right before splitting), reducing contention.
40 | 
41 | ### Range Scan → TupleStream
42 | `IndexHandle::range_scan` wraps `TreeIndexIterator` and automatically fetches heap tuples,
43 | returning `(rid, meta, tuple)` triples. Execution remains storage-agnostic.
44 | 
45 | ### Inline Maintenance
46 | Index inserts/updates/deletes modify the tree immediately and emit logical WAL for redo.
47 | There is no deferred “index vacuum”; once a heap tuple is deleted its index entry is
48 | removed in the same transaction.
49 | 
50 | ---
51 | 
52 | ## Interactions
53 | 
54 | - **Catalog** – stores `Arc<BPlusTreeIndex>` instances alongside table metadata so
55 |   execution can fetch handles directly.
56 | - **Execution** – `PhysicalIndexScan` uses `ExecutionContext::index_stream`; DML operators
57 |   call `insert_tuple_with_indexes` so heap writes and index maintenance stay in sync.
58 | - **Transaction/MVCC** – heaps store transaction metadata; indexes just reference RIDs, so
59 |   MVCC visibility is enforced when tuples are materialised.
60 | - **Recovery** – WAL contains `IndexInsert/IndexDelete` records to replay structural
61 |   changes after crashes.
62 | 
63 | ---
64 | 
65 | ## Teaching Ideas
66 | 
67 | - Build a covering index example to show how avoiding heap lookups improves latency.
68 | - Instrument `TreeIndexIterator` to visualise sibling traversal during range scans.
69 | - Compare SeqScan vs IndexScan on selective predicates to highlight indexing benefits.
70 | 
71 | ---
72 | 
73 | Further reading: [B+Tree internals](../index/btree_index.md)
74 | 


--------------------------------------------------------------------------------
/src/storage/codec/tuple.rs:
--------------------------------------------------------------------------------
 1 | use crate::catalog::SchemaRef;
 2 | use crate::error::{QuillSQLError, QuillSQLResult};
 3 | use crate::storage::codec::{DecodedData, ScalarValueCodec};
 4 | use crate::storage::tuple::Tuple;
 5 | use crate::utils::bitmap::DynamicBitmap;
 6 | use crate::utils::scalar::ScalarValue;
 7 | 
 8 | pub struct TupleCodec;
 9 | 
10 | impl TupleCodec {
11 |     pub fn encode(tuple: &Tuple) -> Vec<u8> {
12 |         // null map
13 |         let mut null_map = DynamicBitmap::new();
14 |         let mut attributes = Vec::new();
15 |         for (idx, value) in tuple.data.iter().enumerate() {
16 |             null_map.set(idx, value.is_null());
17 |             if !value.is_null() {
18 |                 attributes.extend(ScalarValueCodec::encode(value));
19 |             }
20 |         }
21 | 
22 |         let mut bytes = null_map.to_bytes();
23 |         bytes.extend(attributes);
24 |         bytes
25 |     }
26 | 
27 |     pub fn decode(bytes: &[u8], schema: SchemaRef) -> QuillSQLResult<DecodedData<Tuple>> {
28 |         let mut total_offset = 0;
29 | 
30 |         let null_map_bytes = schema.column_count().div_ceil(8);
31 |         let null_map = DynamicBitmap::from_bytes(&bytes[0..null_map_bytes]);
32 |         total_offset += null_map_bytes;
33 |         let mut bytes = &bytes[null_map_bytes..];
34 | 
35 |         let mut data = vec![];
36 |         for (idx, col) in schema.columns.iter().enumerate() {
37 |             let null = null_map.get(idx).ok_or(QuillSQLError::Internal(
38 |                 "null map size should be greater than or equal to col count".to_string(),
39 |             ))?;
40 |             if null {
41 |                 data.push(ScalarValue::new_empty(col.data_type));
42 |             } else {
43 |                 let (value, offset) = ScalarValueCodec::decode(bytes, col.data_type)?;
44 |                 data.push(value);
45 |                 total_offset += offset;
46 |                 bytes = &bytes[offset..];
47 |             }
48 |         }
49 | 
50 |         Ok((Tuple::new(schema, data), total_offset))
51 |     }
52 | }
53 | 
54 | #[cfg(test)]
55 | mod tests {
56 |     use crate::catalog::{Column, DataType, Schema};
57 |     use crate::storage::codec::TupleCodec;
58 |     use crate::storage::tuple::Tuple;
59 |     use crate::utils::scalar::ScalarValue;
60 |     use std::sync::Arc;
61 | 
62 |     #[test]
63 |     fn tuple_codec() {
64 |         let schema = Arc::new(Schema::new(vec![
65 |             Column::new("a", DataType::Boolean, true),
66 |             Column::new("b", DataType::Int32, true),
67 |             Column::new("c", DataType::UInt64, true),
68 |             Column::new("d", DataType::Varchar(None), true),
69 |         ]));
70 |         let tuple = Tuple::new(
71 |             schema.clone(),
72 |             vec![
73 |                 true.into(),
74 |                 ScalarValue::Int32(None),
75 |                 1234u64.into(),
76 |                 "aabb".to_string().into(),
77 |             ],
78 |         );
79 |         let new_tuple = TupleCodec::decode(&TupleCodec::encode(&tuple), schema)
80 |             .unwrap()
81 |             .0;
82 |         assert_eq!(new_tuple, tuple);
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/public/terminal-preview.svg:
--------------------------------------------------------------------------------
 1 | <svg width="960" height="540" viewBox="0 0 960 540" xmlns="http://www.w3.org/2000/svg">
 2 |   <defs>
 3 |     <linearGradient id="bg" x1="0%" y1="0%" x2="100%" y2="100%">
 4 |       <stop offset="0%" stop-color="#0d2f23" />
 5 |       <stop offset="100%" stop-color="#0b1224" />
 6 |     </linearGradient>
 7 |     <linearGradient id="header" x1="0%" y1="0%" x2="100%" y2="0%">
 8 |       <stop offset="0%" stop-color="#0f1c1d" />
 9 |       <stop offset="100%" stop-color="#10252a" />
10 |     </linearGradient>
11 |     <linearGradient id="terminal" x1="0%" y1="0%" x2="100%" y2="100%">
12 |       <stop offset="0%" stop-color="#112728" />
13 |       <stop offset="100%" stop-color="#0c1326" />
14 |     </linearGradient>
15 |     <linearGradient id="prompt" x1="0%" y1="0%" x2="0%" y2="100%">
16 |       <stop offset="0%" stop-color="#0d3628" />
17 |       <stop offset="100%" stop-color="#0c1d21" />
18 |     </linearGradient>
19 |     <filter id="glow" x="-50%" y="-50%" width="200%" height="200%">
20 |       <feGaussianBlur stdDeviation="6" result="coloredBlur" />
21 |       <feMerge>
22 |         <feMergeNode in="coloredBlur" />
23 |         <feMergeNode in="SourceGraphic" />
24 |       </feMerge>
25 |     </filter>
26 |   </defs>
27 |   <rect width="960" height="540" fill="url(#bg)" rx="32" />
28 |   <rect x="40" y="40" width="880" height="460" fill="url(#terminal)" opacity="0.92" rx="20" />
29 |   <rect x="40" y="40" width="880" height="56" fill="url(#header)" opacity="0.96" rx="20" />
30 |   <text x="70" y="75" fill="#1fba7a" font-family="'JetBrains Mono', monospace" font-size="18" letter-spacing="6">QUILLSQL TERMINAL</text>
31 |   <g transform="translate(760,48)">
32 |     <rect width="80" height="28" rx="14" fill="none" stroke="#26f79a" stroke-opacity="0.4" />
33 |     <text x="40" y="19" text-anchor="middle" fill="#26f79a" font-family="'JetBrains Mono', monospace" font-size="13">GITHUB</text>
34 |     <rect width="80" height="28" rx="14" x="-92" fill="none" stroke="#26f79a" stroke-opacity="0.4" />
35 |     <text x="-52" y="19" text-anchor="middle" fill="#26f79a" font-family="'JetBrains Mono', monospace" font-size="13">PROFILE</text>
36 |   </g>
37 |   <text x="80" y="140" fill="#21f2a3" font-family="'JetBrains Mono', monospace" font-size="78" letter-spacing="8" filter="url(#glow)">QUILLSQL</text>
38 |   <text x="80" y="170" fill="#48ffcc" font-family="'JetBrains Mono', monospace" font-size="14" letter-spacing="3">INTERACTIVE TTY · HELP FOR COMMANDS</text>
39 |   <text x="80" y="210" fill="#8ce8d1" font-family="'JetBrains Mono', monospace" font-size="14">Welcome to QUILLSQL Terminal. Type `help` to get started.</text>
40 |   <text x="80" y="234" fill="#58d9bb" font-family="'JetBrains Mono', monospace" font-size="14">Current SQL endpoint: /api/sql</text>
41 |   <rect x="80" y="420" width="800" height="60" fill="url(#prompt)" stroke="#27f79a" stroke-opacity="0.4" rx="10" />
42 |   <text x="100" y="458" fill="#32f6ab" font-family="'JetBrains Mono', monospace" font-size="18">quill@tty:~$</text>
43 |   <text x="240" y="458" fill="#c5ffee" font-family="'JetBrains Mono', monospace" font-size="18">type help for usage · Shift+Enter for newline</text>
44 | </svg>
45 | 


--------------------------------------------------------------------------------
/src/storage/codec/meta_page.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::storage::codec::{CommonCodec, DecodedData};
 3 | use crate::storage::page::MetaPage;
 4 | 
 5 | pub struct MetaPageCodec;
 6 | 
 7 | impl MetaPageCodec {
 8 |     pub fn encode(page: &MetaPage) -> Vec<u8> {
 9 |         let mut bytes = Vec::new();
10 |         bytes.extend(CommonCodec::encode_u32(page.major_version));
11 |         bytes.extend(CommonCodec::encode_u32(page.minor_version));
12 |         bytes.extend(CommonCodec::encode_u32(page.freelist_page_id));
13 |         bytes.extend(CommonCodec::encode_u32(
14 |             page.information_schema_schemas_first_page_id,
15 |         ));
16 |         bytes.extend(CommonCodec::encode_u32(
17 |             page.information_schema_tables_first_page_id,
18 |         ));
19 |         bytes.extend(CommonCodec::encode_u32(
20 |             page.information_schema_columns_first_page_id,
21 |         ));
22 |         bytes.extend(CommonCodec::encode_u32(
23 |             page.information_schema_indexes_first_page_id,
24 |         ));
25 |         bytes
26 |     }
27 | 
28 |     pub fn decode(bytes: &[u8]) -> QuillSQLResult<DecodedData<MetaPage>> {
29 |         let mut left_bytes = bytes;
30 | 
31 |         let (major_version, offset) = CommonCodec::decode_u32(left_bytes)?;
32 |         left_bytes = &left_bytes[offset..];
33 |         let (minor_version, offset) = CommonCodec::decode_u32(left_bytes)?;
34 |         left_bytes = &left_bytes[offset..];
35 |         let (freelist_page_id, offset) = CommonCodec::decode_u32(left_bytes)?;
36 |         left_bytes = &left_bytes[offset..];
37 |         let (information_schema_schemas_first_page_id, offset) =
38 |             CommonCodec::decode_u32(left_bytes)?;
39 |         left_bytes = &left_bytes[offset..];
40 |         let (information_schema_tables_first_page_id, offset) =
41 |             CommonCodec::decode_u32(left_bytes)?;
42 |         left_bytes = &left_bytes[offset..];
43 |         let (information_schema_columns_first_page_id, offset) =
44 |             CommonCodec::decode_u32(left_bytes)?;
45 |         left_bytes = &left_bytes[offset..];
46 |         let (information_schema_indexes_first_page_id, offset) =
47 |             CommonCodec::decode_u32(left_bytes)?;
48 |         left_bytes = &left_bytes[offset..];
49 | 
50 |         Ok((
51 |             MetaPage {
52 |                 major_version,
53 |                 minor_version,
54 |                 freelist_page_id,
55 |                 information_schema_schemas_first_page_id,
56 |                 information_schema_tables_first_page_id,
57 |                 information_schema_columns_first_page_id,
58 |                 information_schema_indexes_first_page_id,
59 |             },
60 |             bytes.len() - left_bytes.len(),
61 |         ))
62 |     }
63 | }
64 | 
65 | #[cfg(test)]
66 | mod tests {
67 |     use crate::storage::codec::MetaPageCodec;
68 |     use crate::storage::page::MetaPage;
69 | 
70 |     #[test]
71 |     fn meta_page_codec() {
72 |         let page = MetaPage::try_new().unwrap();
73 |         let (new_page, _) = MetaPageCodec::decode(&MetaPageCodec::encode(&page)).unwrap();
74 |         assert_eq!(page, new_page);
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/utils/table_ref.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
 2 | pub enum TableReference {
 3 |     /// An unqualified table reference, e.g. "table"
 4 |     Bare {
 5 |         /// The table name
 6 |         table: String,
 7 |     },
 8 |     /// A partially resolved table reference, e.g. "schema.table"
 9 |     Partial {
10 |         /// The schema containing the table
11 |         schema: String,
12 |         /// The table name
13 |         table: String,
14 |     },
15 |     /// A fully resolved table reference, e.g. "catalog.schema.table"
16 |     Full {
17 |         /// The catalog (aka database) containing the table
18 |         catalog: String,
19 |         /// The schema containing the table
20 |         schema: String,
21 |         /// The table name
22 |         table: String,
23 |     },
24 | }
25 | 
26 | impl TableReference {
27 |     pub fn table(&self) -> &str {
28 |         match self {
29 |             Self::Full { table, .. } | Self::Partial { table, .. } | Self::Bare { table } => table,
30 |         }
31 |     }
32 | 
33 |     pub fn schema(&self) -> Option<&str> {
34 |         match self {
35 |             Self::Full { schema, .. } | Self::Partial { schema, .. } => Some(schema),
36 |             _ => None,
37 |         }
38 |     }
39 | 
40 |     pub fn catalog(&self) -> Option<&str> {
41 |         match self {
42 |             Self::Full { catalog, .. } => Some(catalog),
43 |             _ => None,
44 |         }
45 |     }
46 | 
47 |     pub fn resolved_eq(&self, other: &Self) -> bool {
48 |         match self {
49 |             TableReference::Bare { table } => table == other.table(),
50 |             TableReference::Partial { schema, table } => {
51 |                 table == other.table() && other.schema().map_or(true, |s| s == schema)
52 |             }
53 |             TableReference::Full {
54 |                 catalog,
55 |                 schema,
56 |                 table,
57 |             } => {
58 |                 table == other.table()
59 |                     && other.schema().map_or(true, |s| s == schema)
60 |                     && other.catalog().map_or(true, |c| c == catalog)
61 |             }
62 |         }
63 |     }
64 | 
65 |     pub fn to_log_string(&self) -> String {
66 |         match self {
67 |             TableReference::Bare { table } => table.clone(),
68 |             TableReference::Partial { schema, table } => format!("{schema}.{table}"),
69 |             TableReference::Full {
70 |                 catalog,
71 |                 schema,
72 |                 table,
73 |             } => format!("{catalog}.{schema}.{table}"),
74 |         }
75 |     }
76 | }
77 | 
78 | impl std::fmt::Display for TableReference {
79 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
80 |         match self {
81 |             TableReference::Bare { table } => write!(f, "{table}"),
82 |             TableReference::Partial { schema, table } => {
83 |                 write!(f, "{schema}.{table}")
84 |             }
85 |             TableReference::Full {
86 |                 catalog,
87 |                 schema,
88 |                 table,
89 |             } => write!(f, "{catalog}.{schema}.{table}"),
90 |         }
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/src/storage/codec/freelist_page.rs:
--------------------------------------------------------------------------------
 1 | use crate::buffer::PAGE_SIZE;
 2 | use crate::error::QuillSQLResult;
 3 | use crate::storage::codec::{CommonCodec, DecodedData};
 4 | use crate::storage::page::{FreelistPage, FreelistPageHeader};
 5 | 
 6 | pub struct FreelistPageHeaderCodec;
 7 | 
 8 | impl FreelistPageHeaderCodec {
 9 |     pub fn encode(header: &FreelistPageHeader) -> Vec<u8> {
10 |         let mut bytes = Vec::new();
11 |         bytes.extend(CommonCodec::encode_u32(header.next_page_id));
12 |         bytes.extend(CommonCodec::encode_u32(header.current_size));
13 |         bytes.extend(CommonCodec::encode_u32(header.max_size));
14 |         bytes
15 |     }
16 | 
17 |     pub fn decode(bytes: &[u8]) -> QuillSQLResult<DecodedData<FreelistPageHeader>> {
18 |         let mut left_bytes = bytes;
19 | 
20 |         let (next_page_id, offset) = CommonCodec::decode_u32(left_bytes)?;
21 |         left_bytes = &left_bytes[offset..];
22 | 
23 |         let (current_size, offset) = CommonCodec::decode_u32(left_bytes)?;
24 |         left_bytes = &left_bytes[offset..];
25 | 
26 |         let (max_size, offset) = CommonCodec::decode_u32(left_bytes)?;
27 |         left_bytes = &left_bytes[offset..];
28 | 
29 |         Ok((
30 |             FreelistPageHeader {
31 |                 next_page_id,
32 |                 current_size,
33 |                 max_size,
34 |             },
35 |             bytes.len() - left_bytes.len(),
36 |         ))
37 |     }
38 | }
39 | 
40 | pub struct FreelistPageCodec;
41 | 
42 | impl FreelistPageCodec {
43 |     pub fn encode(page: &FreelistPage) -> Vec<u8> {
44 |         let mut bytes = Vec::new();
45 |         bytes.extend(FreelistPageHeaderCodec::encode(&page.header));
46 |         for i in 0..page.header.current_size {
47 |             bytes.extend(CommonCodec::encode_u32(page.array[i as usize]))
48 |         }
49 |         // make sure length of bytes is BUSTUBX_PAGE_SIZE
50 |         assert!(bytes.len() <= PAGE_SIZE);
51 |         bytes.extend(vec![0; PAGE_SIZE - bytes.len()]);
52 |         bytes
53 |     }
54 | 
55 |     pub fn decode(bytes: &[u8]) -> QuillSQLResult<DecodedData<FreelistPage>> {
56 |         let mut left_bytes = bytes;
57 | 
58 |         let (header, offset) = FreelistPageHeaderCodec::decode(left_bytes)?;
59 |         left_bytes = &left_bytes[offset..];
60 | 
61 |         let mut array = Vec::new();
62 |         for _ in 0..header.current_size {
63 |             let (page_id, offset) = CommonCodec::decode_u32(left_bytes)?;
64 |             left_bytes = &left_bytes[offset..];
65 |             array.push(page_id);
66 |         }
67 | 
68 |         Ok((FreelistPage { header, array }, PAGE_SIZE))
69 |     }
70 | }
71 | 
72 | #[cfg(test)]
73 | mod tests {
74 |     use crate::storage::codec::FreelistPageCodec;
75 |     use crate::storage::page::{FreelistPage, FreelistPageHeader, FREELIST_PAGE_MAX_SIZE};
76 | 
77 |     #[test]
78 |     fn freelist_page_codec() {
79 |         let page = FreelistPage {
80 |             header: FreelistPageHeader {
81 |                 next_page_id: 1,
82 |                 current_size: 3,
83 |                 max_size: *FREELIST_PAGE_MAX_SIZE as u32,
84 |             },
85 |             array: vec![5, 6, 8],
86 |         };
87 |         let (new_page, _) = FreelistPageCodec::decode(&FreelistPageCodec::encode(&page)).unwrap();
88 |         assert_eq!(page, new_page);
89 |     }
90 | }
91 | 


--------------------------------------------------------------------------------
/docs/src/modules/catalog.md:
--------------------------------------------------------------------------------
 1 | # Catalog Module
 2 | 
 3 | `src/catalog/` acts as QuillSQL’s data dictionary. It tracks schema/table/index metadata,
 4 | statistics, and the mapping between logical names and physical storage objects such as
 5 | `TableHeap` and `BPlusTreeIndex`. Every layer—planner, execution, background workers—uses
 6 | the catalog to discover structure.
 7 | 
 8 | ---
 9 | 
10 | ## Responsibilities
11 | 
12 | - Persist definitions for schemas, tables, columns, indexes, and constraints.
13 | - Map logical `TableReference`s to physical handles (heap files, index roots, file ids).
14 | - Store table statistics (row counts, histograms) that drive ANALYZE and optimization.
15 | - Manage the DDL lifecycle: creation and deletion update the in-memory registry and the
16 |   on-disk metadata pages.
17 | 
18 | ---
19 | 
20 | ## Directory Layout
21 | 
22 | | Path | Description | Key Types |
23 | | ---- | ----------- | --------- |
24 | | `mod.rs` | Public API surface. | `Catalog`, `TableHandleRef` |
25 | | `schema.rs` | Schema objects and table references. | `Schema`, `Column`, `TableReference` |
26 | | `registry/` | Thread-safe registry for heaps (MVCC vacuum). | `TableRegistry` |
27 | | `statistics.rs` | ANALYZE output and helpers. | `TableStatistics` |
28 | | `loader.rs` | Boot-time metadata loader. | `load_catalog_data` |
29 | 
30 | ---
31 | 
32 | ## Core Concepts
33 | 
34 | ### TableReference
35 | Unified identifier (database, schema, table). Logical planner, execution, and transaction
36 | code all use it when requesting handles from the catalog.
37 | 
38 | ### Registries
39 | `TableRegistry` maps internal IDs to `Arc<TableHeap>` plus logical names. It is used by
40 | the MVCC vacuum worker to iterate user tables without poking directly into catalog data.
41 | 
42 | ### Schema & Column
43 | `Schema` stores column definitions (type, default, nullability). Execution uses it when
44 | materialising tuples; the planner uses it to check expression types. `Schema::project`
45 | helps physical operators build projected outputs.
46 | 
47 | ### TableStatistics
48 | `ANALYZE` writes row counts and histograms into the catalog. Optimizer rules and planner
49 | heuristics can consult these stats when deciding whether to push filters or pick indexes.
50 | Each column tracks null/non-null counts, min/max values, and a sample-based distinct
51 | estimate, enabling DuckDB-style selectivity heuristics (`1/distinct`, uniform ranges).
52 | 
53 | ---
54 | 
55 | ## Interactions
56 | 
57 | - **SQL / Planner** – DDL planning calls `Catalog::create_table` / `create_index`; name
58 |   binding relies on `Schema`.
59 | - **Execution** – `ExecutionContext::table_handle` and `index_handle` fetch physical
60 |   handles through the catalog, so scans never hard-code heap locations.
61 | - **Background workers** – MVCC and index vacuum iterate the registries via `Arc` clones.
62 | - **Recovery** – `load_catalog_data` rebuilds the in-memory catalog from control files and
63 |   metadata pages during startup.
64 | 
65 | ---
66 | 
67 | ## Teaching Ideas
68 | 
69 | - Extend the schema system with hidden or computed columns and teach the catalog to store
70 |   the extra metadata.
71 | - Add histogram bins to `TableStatistics` and demonstrate how a simple cost heuristic can
72 |   choose better plans.
73 | - Turn on `RUST_LOG=catalog=debug` to observe how DDL mutates the registries.
74 | 


--------------------------------------------------------------------------------
/src/optimizer/rule/merge_limit.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::QuillSQLResult;
 2 | use crate::optimizer::logical_optimizer::ApplyOrder;
 3 | use crate::optimizer::LogicalOptimizerRule;
 4 | use crate::plan::logical_plan::{Limit, LogicalPlan};
 5 | use std::cmp::min;
 6 | use std::sync::Arc;
 7 | 
 8 | pub struct MergeLimit;
 9 | 
10 | impl LogicalOptimizerRule for MergeLimit {
11 |     fn try_optimize(&self, plan: &LogicalPlan) -> QuillSQLResult<Option<LogicalPlan>> {
12 |         let LogicalPlan::Limit(parent) = plan else {
13 |             return Ok(None);
14 |         };
15 | 
16 |         if let LogicalPlan::Limit(child) = &*parent.input {
17 |             let new_limit = match (parent.limit, child.limit) {
18 |                 (Some(parent_limit), Some(child_limit)) => {
19 |                     Some(min(parent_limit, child_limit.saturating_sub(parent.offset)))
20 |                 }
21 |                 (Some(parent_limit), None) => Some(parent_limit),
22 |                 (None, Some(child_limit)) => Some(child_limit.saturating_sub(parent.offset)),
23 |                 (None, None) => None,
24 |             };
25 |             let plan = LogicalPlan::Limit(Limit {
26 |                 limit: new_limit,
27 |                 offset: child.offset + parent.offset,
28 |                 input: Arc::new((*child.input).clone()),
29 |             });
30 |             self.try_optimize(&plan)
31 |                 .map(|opt_plan| opt_plan.or_else(|| Some(plan)))
32 |         } else {
33 |             Ok(None)
34 |         }
35 |     }
36 | 
37 |     fn name(&self) -> &str {
38 |         "MergeLimit"
39 |     }
40 | 
41 |     fn apply_order(&self) -> Option<ApplyOrder> {
42 |         Some(ApplyOrder::TopDown)
43 |     }
44 | }
45 | 
46 | #[cfg(test)]
47 | mod tests {
48 |     use crate::catalog::EMPTY_SCHEMA_REF;
49 |     use crate::optimizer::rule::MergeLimit;
50 |     use crate::optimizer::LogicalOptimizer;
51 |     use crate::plan::logical_plan::{EmptyRelation, Limit, LogicalPlan};
52 |     use std::sync::Arc;
53 | 
54 |     fn build_optimizer() -> LogicalOptimizer {
55 |         LogicalOptimizer::with_rules(vec![Arc::new(MergeLimit)])
56 |     }
57 | 
58 |     #[test]
59 |     fn merge_limit() {
60 |         let plan = LogicalPlan::Limit(Limit {
61 |             limit: Some(10),
62 |             offset: 0,
63 |             input: Arc::new(LogicalPlan::Limit(Limit {
64 |                 limit: Some(1000),
65 |                 offset: 0,
66 |                 input: Arc::new(LogicalPlan::Limit(Limit {
67 |                     limit: None,
68 |                     offset: 10,
69 |                     input: Arc::new(LogicalPlan::EmptyRelation(EmptyRelation {
70 |                         produce_one_row: false,
71 |                         schema: EMPTY_SCHEMA_REF.clone(),
72 |                     })),
73 |                 })),
74 |             })),
75 |         });
76 |         let optimized_plan = build_optimizer().optimize(&plan).unwrap();
77 | 
78 |         if let LogicalPlan::Limit(Limit {
79 |             limit,
80 |             offset,
81 |             input,
82 |         }) = optimized_plan
83 |         {
84 |             assert_eq!(limit, Some(10));
85 |             assert_eq!(offset, 10);
86 |             assert!(matches!(input.as_ref(), LogicalPlan::EmptyRelation(_)));
87 |         } else {
88 |             panic!("the first node should be limit");
89 |         }
90 |     }
91 | }
92 | 


--------------------------------------------------------------------------------
/src/plan/logical_planner/plan_query.rs:
--------------------------------------------------------------------------------
 1 | use crate::error::{QuillSQLError, QuillSQLResult};
 2 | use crate::expression::Expr;
 3 | use crate::utils::scalar::ScalarValue;
 4 | use std::sync::Arc;
 5 | 
 6 | use crate::plan::logical_plan::{Limit, LogicalPlan, Sort};
 7 | 
 8 | use super::LogicalPlanner;
 9 | 
10 | impl<'a> LogicalPlanner<'a> {
11 |     pub fn plan_query(&self, query: &sqlparser::ast::Query) -> QuillSQLResult<LogicalPlan> {
12 |         let plan = self.plan_set_expr(&query.body)?;
13 |         let plan = self.plan_order_by(plan, &query.order_by)?;
14 |         self.plan_limit(plan, &query.limit, &query.offset)
15 |     }
16 | 
17 |     pub fn plan_order_by(
18 |         &self,
19 |         input: LogicalPlan,
20 |         order_by: &Vec<sqlparser::ast::OrderByExpr>,
21 |     ) -> QuillSQLResult<LogicalPlan> {
22 |         if order_by.is_empty() {
23 |             return Ok(input);
24 |         }
25 | 
26 |         let mut order_by_exprs = vec![];
27 |         for order in order_by {
28 |             order_by_exprs.push(self.bind_order_by_expr(order)?);
29 |         }
30 | 
31 |         Ok(LogicalPlan::Sort(Sort {
32 |             order_by: order_by_exprs,
33 |             input: Arc::new(input),
34 |             limit: None,
35 |         }))
36 |     }
37 | 
38 |     pub fn plan_limit(
39 |         &self,
40 |         input: LogicalPlan,
41 |         limit: &Option<sqlparser::ast::Expr>,
42 |         offset: &Option<sqlparser::ast::Offset>,
43 |     ) -> QuillSQLResult<LogicalPlan> {
44 |         if limit.is_none() && offset.is_none() {
45 |             return Ok(input);
46 |         }
47 | 
48 |         let limit = match limit {
49 |             None => None,
50 |             Some(limit_expr) => {
51 |                 let n = match self.bind_expr(limit_expr)? {
52 |                     Expr::Literal(lit) => match lit.value {
53 |                         ScalarValue::Int64(Some(v)) if v >= 0 => Ok(v as usize),
54 |                         _ => Err(QuillSQLError::Plan(format!(
55 |                             "LIMIT must not be negative, {}",
56 |                             lit.value
57 |                         ))),
58 |                     },
59 |                     _ => Err(QuillSQLError::Plan(format!(
60 |                         "LIMIT must be literal, {}",
61 |                         limit_expr
62 |                     ))),
63 |                 }?;
64 |                 Some(n)
65 |             }
66 |         };
67 | 
68 |         let offset = match offset {
69 |             None => 0,
70 |             Some(offset_expr) => match self.bind_expr(&offset_expr.value)? {
71 |                 Expr::Literal(lit) => match lit.value {
72 |                     ScalarValue::Int64(Some(v)) => {
73 |                         if v < 0 {
74 |                             return Err(QuillSQLError::Plan(format!("Offset must be >= 0, {}", v)));
75 |                         }
76 |                         Ok(v as usize)
77 |                     }
78 |                     _ => Err(QuillSQLError::Plan(format!(
79 |                         "Offset value not int64, {}",
80 |                         lit.value
81 |                     ))),
82 |                 },
83 |                 _ => Err(QuillSQLError::Plan(format!(
84 |                     "Offset expression not expected, {}",
85 |                     offset_expr
86 |                 ))),
87 |             }?,
88 |         };
89 | 
90 |         Ok(LogicalPlan::Limit(Limit {
91 |             limit,
92 |             offset,
93 |             input: Arc::new(input),
94 |         }))
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/docs/src/contributing.md:
--------------------------------------------------------------------------------
 1 | # Contributor's Guide
 2 | 
 3 | Welcome, and thank you for your interest in contributing to QuillSQL! Whether you're fixing a bug, adding a new feature, or improving the documentation, this guide will help you get started.
 4 | 
 5 | ## 1. Getting Started: Your Development Environment
 6 | 
 7 | ### Prerequisites
 8 | 
 9 | - **Rust**: QuillSQL is written in Rust. If you don't have it yet, install it via [rustup](https://rustup.rs/). This will provide you with `rustc` (the compiler) and `cargo` (the package manager and build tool).
10 |   ```bash
11 |   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
12 |   ```
13 | - **Build Essentials**: Ensure you have a C++ compiler like `gcc` or `clang` installed, which is a common dependency for some Rust crates.
14 | 
15 | ### Setup
16 | 
17 | 1.  **Fork the Repository**: Start by forking the main QuillSQL repository to your own GitHub account.
18 | 
19 | 2.  **Clone Your Fork**: Clone your forked repository to your local machine.
20 |     ```bash
21 |     git clone https://github.com/feichai0017/QuillSQL.git
22 |     cd QuillSQL
23 |     ```
24 | 
25 | 3.  **Build the Project**: Compile the entire project to ensure everything is set up correctly.
26 |     ```bash
27 |     cargo build
28 |     ```
29 | 
30 | ## 2. Development Workflow
31 | 
32 | ### Running Tests
33 | 
34 | Before and after making any changes, it's crucial to run the test suite to ensure you haven't broken anything.
35 | 
36 | - **Run all unit and integration tests**:
37 |   ```bash
38 |   cargo test
39 |   ```
40 | 
41 | - **Run the benchmark suite**:
42 |   ```bash
43 |   cargo bench
44 |   ```
45 | 
46 | ### Code Style and Quality
47 | 
48 | We adhere to the standard Rust coding style and use tools to enforce it.
49 | 
50 | - **Formatting**: Before committing, please format your code with `rustfmt`.
51 |   ```bash
52 |   cargo fmt --all
53 |   ```
54 | 
55 | - **Linting**: We use `clippy` to catch common mistakes and improve code quality. Please ensure `clippy` passes without warnings.
56 |   ```bash
57 |   cargo clippy --all-targets -- -D warnings
58 |   ```
59 | 
60 | ### Submitting Your Contribution
61 | 
62 | 1.  **Create a New Branch**: Create a descriptive branch name for your feature or bugfix.
63 |     ```bash
64 |     git checkout -b my-awesome-feature
65 |     ```
66 | 
67 | 2.  **Make Your Changes**: Write your code. Add new tests to cover your changes. Ensure all existing tests still pass.
68 | 
69 | 3.  **Format and Lint**: Run `cargo fmt` and `cargo clippy` as described above.
70 | 
71 | 4.  **Commit Your Work**: Write a clear and concise commit message.
72 |     ```bash
73 |     git add .
74 |     git commit -m "feat: Add support for window functions"
75 |     ```
76 | 
77 | 5.  **Push to Your Fork**: Push your branch to your fork on GitHub.
78 |     ```bash
79 |     git push -u origin my-awesome-feature
80 |     ```
81 | 
82 | 6.  **Open a Pull Request**: Go to the original QuillSQL repository on GitHub. You should see a prompt to open a Pull Request from your new branch. Fill out the PR template with a description of your changes.
83 | 
84 | ## 3. Working on the Documentation
85 | 
86 | The documentation is built using `mdbook`. To preview your changes locally, you'll need to install it and the `mermaid` plugin.
87 | 
88 | 1.  **Install `mdbook` and `mdbook-mermaid`**:
89 |     ```bash
90 |     cargo install mdbook
91 |     cargo install mdbook-mermaid
92 |     ```
93 | 
94 | 2.  **Serve the Book Locally**: Run the following command from the root of the project.
95 |     ```bash
96 |     mdbook serve docs
97 |     ```
98 |     This will build the book and start a local web server. You can open your browser to `http://localhost:3000` to see the live-previewed documentation.
99 | 


--------------------------------------------------------------------------------
/src/execution/mod.rs:
--------------------------------------------------------------------------------
  1 | pub mod physical_plan;
  2 | use crate::catalog::SchemaRef;
  3 | use crate::error::{QuillSQLError, QuillSQLResult};
  4 | use crate::execution::physical_plan::PhysicalPlan;
  5 | use crate::expression::{Expr, ExprTrait};
  6 | use crate::storage::{
  7 |     engine::{StorageEngine, TableBinding},
  8 |     table_heap::TableHeap,
  9 |     tuple::Tuple,
 10 | };
 11 | use crate::transaction::{Transaction, TransactionManager, TxnContext};
 12 | use crate::utils::scalar::ScalarValue;
 13 | use crate::{catalog::Catalog, utils::table_ref::TableReference};
 14 | use std::sync::Arc;
 15 | 
 16 | pub trait VolcanoExecutor {
 17 |     fn init(&self, _context: &mut ExecutionContext) -> QuillSQLResult<()> {
 18 |         Ok(())
 19 |     }
 20 | 
 21 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>>;
 22 | 
 23 |     fn output_schema(&self) -> SchemaRef;
 24 | }
 25 | 
 26 | /// Shared state threaded through every physical operator during execution.
 27 | /// Exposes MVCC helpers, storage access, expression evaluation and DDL utilities.
 28 | pub struct ExecutionContext<'a> {
 29 |     /// Mutable reference to the global catalog (schema + metadata).
 30 |     pub catalog: &'a mut Catalog,
 31 |     /// Pluggable storage engine used for heap/index access.
 32 |     storage: Arc<dyn StorageEngine>,
 33 |     /// Transaction runtime wrapper (snapshot, locks, undo tracking).
 34 |     txn: TxnContext<'a>,
 35 | }
 36 | 
 37 | impl<'a> ExecutionContext<'a> {
 38 |     pub fn new(
 39 |         catalog: &'a mut Catalog,
 40 |         txn: &'a mut Transaction,
 41 |         txn_mgr: Arc<TransactionManager>,
 42 |         storage: Arc<dyn StorageEngine>,
 43 |     ) -> Self {
 44 |         Self {
 45 |             catalog,
 46 |             storage,
 47 |             txn: TxnContext::new(txn_mgr, txn),
 48 |         }
 49 |     }
 50 | 
 51 |     /// Evaluate an expression expected to produce a boolean result.
 52 |     pub fn eval_predicate(&self, expr: &Expr, tuple: &Tuple) -> QuillSQLResult<bool> {
 53 |         match expr.evaluate(tuple)? {
 54 |             ScalarValue::Boolean(Some(v)) => Ok(v),
 55 |             ScalarValue::Boolean(None) => Ok(false),
 56 |             other => Err(QuillSQLError::Execution(format!(
 57 |                 "predicate value must be boolean, got {}",
 58 |                 other
 59 |             ))),
 60 |         }
 61 |     }
 62 | 
 63 |     /// Evaluate an arbitrary scalar expression.
 64 |     pub fn eval_expr(&self, expr: &Expr, tuple: &Tuple) -> QuillSQLResult<ScalarValue> {
 65 |         expr.evaluate(tuple)
 66 |     }
 67 | 
 68 |     /// Look up the table heap through the storage engine.
 69 |     pub fn table(&self, table: &TableReference) -> QuillSQLResult<TableBinding> {
 70 |         self.storage.table(self.catalog, table)
 71 |     }
 72 | 
 73 |     pub fn table_heap(&self, table: &TableReference) -> QuillSQLResult<Arc<TableHeap>> {
 74 |         Ok(self.table(table)?.table_heap())
 75 |     }
 76 | 
 77 |     pub fn txn_ctx(&self) -> &TxnContext<'a> {
 78 |         &self.txn
 79 |     }
 80 | 
 81 |     pub fn txn_ctx_mut(&mut self) -> &mut TxnContext<'a> {
 82 |         &mut self.txn
 83 |     }
 84 | }
 85 | 
 86 | pub struct ExecutionEngine<'a> {
 87 |     pub context: ExecutionContext<'a>,
 88 | }
 89 | impl<'a> ExecutionEngine<'a> {
 90 |     pub fn execute(&mut self, plan: Arc<PhysicalPlan>) -> QuillSQLResult<Vec<Tuple>> {
 91 |         plan.init(&mut self.context)?;
 92 |         let mut result = Vec::new();
 93 |         loop {
 94 |             let next_tuple = plan.next(&mut self.context)?;
 95 |             if let Some(tuple) = next_tuple {
 96 |                 result.push(tuple);
 97 |             } else {
 98 |                 break;
 99 |             }
100 |         }
101 |         Ok(result)
102 |     }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/nested_loop_join.rs:
--------------------------------------------------------------------------------
  1 | //! Teaching-first nested loop join with optional predicate evaluation.
  2 | 
  3 | use log::debug;
  4 | use std::cell::RefCell;
  5 | use std::sync::Arc;
  6 | 
  7 | use crate::catalog::SchemaRef;
  8 | use crate::expression::Expr;
  9 | use crate::{
 10 |     error::QuillSQLResult,
 11 |     execution::{ExecutionContext, VolcanoExecutor},
 12 |     plan::logical_plan::JoinType,
 13 |     storage::tuple::Tuple,
 14 | };
 15 | 
 16 | use super::PhysicalPlan;
 17 | 
 18 | #[derive(Debug)]
 19 | pub struct PhysicalNestedLoopJoin {
 20 |     pub join_type: JoinType,
 21 |     pub condition: Option<Expr>,
 22 |     pub left_input: Arc<PhysicalPlan>,
 23 |     pub right_input: Arc<PhysicalPlan>,
 24 |     pub schema: SchemaRef,
 25 | 
 26 |     left_tuple: RefCell<Option<Tuple>>,
 27 | }
 28 | impl PhysicalNestedLoopJoin {
 29 |     pub fn new(
 30 |         join_type: JoinType,
 31 |         condition: Option<Expr>,
 32 |         left_input: Arc<PhysicalPlan>,
 33 |         right_input: Arc<PhysicalPlan>,
 34 |         schema: SchemaRef,
 35 |     ) -> Self {
 36 |         PhysicalNestedLoopJoin {
 37 |             join_type,
 38 |             condition,
 39 |             left_input,
 40 |             right_input,
 41 |             schema,
 42 |             left_tuple: RefCell::new(None),
 43 |         }
 44 |     }
 45 | }
 46 | impl VolcanoExecutor for PhysicalNestedLoopJoin {
 47 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
 48 |         debug!("init nested loop join executor");
 49 |         self.left_input.init(context)?;
 50 |         self.right_input.init(context)?;
 51 |         self.left_tuple.borrow_mut().take();
 52 |         Ok(())
 53 |     }
 54 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
 55 |         let mut left_next_tuple = if self.left_tuple.borrow().is_none() {
 56 |             self.left_input.next(context)?
 57 |         } else {
 58 |             self.left_tuple.borrow().clone()
 59 |         };
 60 | 
 61 |         while left_next_tuple.is_some() {
 62 |             let left_tuple = left_next_tuple.clone().unwrap();
 63 | 
 64 |             let mut right_next_tuple = self.right_input.next(context)?;
 65 |             while right_next_tuple.is_some() {
 66 |                 let right_tuple = right_next_tuple.unwrap();
 67 | 
 68 |                 // TODO judge if matches
 69 |                 if let Some(condition) = &self.condition {
 70 |                     let merged_tuple =
 71 |                         Tuple::try_merge(vec![left_tuple.clone(), right_tuple.clone()])?;
 72 |                     if context.eval_predicate(condition, &merged_tuple)? {
 73 |                         self.left_tuple.borrow_mut().replace(left_tuple.clone());
 74 |                         return Ok(Some(Tuple::try_merge(vec![left_tuple, right_tuple])?));
 75 |                     }
 76 |                 } else {
 77 |                     // save latest left_next_result before return
 78 |                     self.left_tuple.borrow_mut().replace(left_tuple.clone());
 79 | 
 80 |                     return Ok(Some(Tuple::try_merge(vec![left_tuple, right_tuple])?));
 81 |                 }
 82 | 
 83 |                 right_next_tuple = self.right_input.next(context)?;
 84 |             }
 85 | 
 86 |             // reset right executor
 87 |             self.right_input.init(context)?;
 88 |             left_next_tuple = self.left_input.next(context)?;
 89 |         }
 90 |         Ok(None)
 91 |     }
 92 | 
 93 |     fn output_schema(&self) -> SchemaRef {
 94 |         self.schema.clone()
 95 |     }
 96 | }
 97 | 
 98 | impl std::fmt::Display for PhysicalNestedLoopJoin {
 99 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 |         write!(f, "NestedLoopJoin")
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/src/tests/sql_test.rs:
--------------------------------------------------------------------------------
  1 | use crate::database::Database;
  2 | use crate::error::QuillSQLError;
  3 | use crate::session::SessionContext;
  4 | use crate::storage::tuple::Tuple;
  5 | use regex::Regex;
  6 | use sqllogictest::{DBOutput, DefaultColumnType};
  7 | use std::path::{Path, PathBuf};
  8 | 
  9 | pub struct QuillSQLDB {
 10 |     db: Database,
 11 |     session: SessionContext,
 12 | }
 13 | 
 14 | impl Default for QuillSQLDB {
 15 |     fn default() -> Self {
 16 |         Self::new()
 17 |     }
 18 | }
 19 | 
 20 | impl QuillSQLDB {
 21 |     pub fn new() -> Self {
 22 |         let db = Database::new_temp().unwrap();
 23 |         let session = SessionContext::new(db.default_isolation());
 24 |         Self { db, session }
 25 |     }
 26 | }
 27 | 
 28 | fn tuples_to_sqllogictest_string(tuples: Vec<Tuple>) -> Vec<Vec<String>> {
 29 |     let mut output = vec![];
 30 |     for tuple in tuples.iter() {
 31 |         let mut row = vec![];
 32 |         for value in tuple.data.iter() {
 33 |             row.push(format!("{value}"));
 34 |         }
 35 |         output.push(row);
 36 |     }
 37 |     output
 38 | }
 39 | 
 40 | impl sqllogictest::DB for QuillSQLDB {
 41 |     type Error = QuillSQLError;
 42 |     type ColumnType = DefaultColumnType;
 43 | 
 44 |     fn run(&mut self, sql: &str) -> Result<DBOutput<Self::ColumnType>, Self::Error> {
 45 |         let is_query_sql = {
 46 |             let lower_sql = sql.trim_start().to_ascii_lowercase();
 47 |             lower_sql.starts_with("select") || lower_sql.starts_with("explain")
 48 |         };
 49 |         let tuples = self.db.run_with_session(&mut self.session, sql)?;
 50 |         if tuples.is_empty() {
 51 |             if is_query_sql {
 52 |                 return Ok(DBOutput::Rows {
 53 |                     types: vec![],
 54 |                     rows: vec![],
 55 |                 });
 56 |             } else {
 57 |                 return Ok(DBOutput::StatementComplete(0));
 58 |             }
 59 |         }
 60 |         let types = vec![DefaultColumnType::Any; tuples[0].schema.column_count()];
 61 |         let rows = tuples_to_sqllogictest_string(tuples);
 62 |         Ok(DBOutput::Rows { types, rows })
 63 |     }
 64 | }
 65 | 
 66 | #[test]
 67 | fn sqllogictest() {
 68 |     let test_files = read_dir_recursive("src/tests/sql_example/");
 69 |     println!("test_files: {:?}", test_files);
 70 | 
 71 |     for file in test_files {
 72 |         let db = QuillSQLDB::new();
 73 |         let mut tester = sqllogictest::Runner::new(db);
 74 |         println!(
 75 |             "======== start to run file {} ========",
 76 |             file.to_str().unwrap()
 77 |         );
 78 |         tester.run_file(file).unwrap();
 79 |     }
 80 | }
 81 | 
 82 | #[allow(dead_code)]
 83 | fn read_dir_recursive<P: AsRef<Path>>(path: P) -> Vec<PathBuf> {
 84 |     let mut dst = vec![];
 85 |     read_dir_recursive_impl(&mut dst, path.as_ref());
 86 |     dst
 87 | }
 88 | 
 89 | fn read_dir_recursive_impl(dst: &mut Vec<PathBuf>, path: &Path) {
 90 |     let push_file = |dst: &mut Vec<PathBuf>, path: PathBuf| {
 91 |         // skip _xxx.slt file
 92 |         if Regex::new(r"/_.*\.slt")
 93 |             .unwrap()
 94 |             .is_match(path.to_str().unwrap())
 95 |         {
 96 |             println!("skip file: {:?}", path);
 97 |         } else {
 98 |             dst.push(path);
 99 |         }
100 |     };
101 | 
102 |     if path.is_dir() {
103 |         let entries = std::fs::read_dir(path).unwrap();
104 |         for entry in entries {
105 |             let path = entry.unwrap().path();
106 | 
107 |             if path.is_dir() {
108 |                 read_dir_recursive_impl(dst, &path);
109 |             } else {
110 |                 push_file(dst, path);
111 |             }
112 |         }
113 |     } else {
114 |         push_file(dst, path.to_path_buf());
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/src/execution/physical_plan/seq_scan.rs:
--------------------------------------------------------------------------------
  1 | //! Table sequential scan operator (full-table read with MVCC filtering).
  2 | 
  3 | use std::cell::RefCell;
  4 | use std::sync::OnceLock;
  5 | 
  6 | use super::scan::ScanPrefetch;
  7 | use crate::catalog::SchemaRef;
  8 | use crate::execution::physical_plan::{resolve_table_binding, stream_not_ready};
  9 | use crate::storage::{
 10 |     engine::{TableBinding, TupleStream},
 11 |     page::{RecordId, TupleMeta},
 12 | };
 13 | use crate::transaction::LockMode;
 14 | use crate::utils::table_ref::TableReference;
 15 | use crate::{
 16 |     error::QuillSQLResult,
 17 |     execution::{ExecutionContext, VolcanoExecutor},
 18 |     storage::tuple::Tuple,
 19 | };
 20 | 
 21 | const PREFETCH_BATCH: usize = 64;
 22 | 
 23 | pub struct PhysicalSeqScan {
 24 |     pub table: TableReference,
 25 |     pub table_schema: SchemaRef,
 26 | 
 27 |     iterator: RefCell<Option<Box<dyn TupleStream>>>,
 28 |     prefetch: ScanPrefetch,
 29 |     table_binding: OnceLock<TableBinding>,
 30 | }
 31 | 
 32 | impl PhysicalSeqScan {
 33 |     pub fn new(table: TableReference, table_schema: SchemaRef) -> Self {
 34 |         PhysicalSeqScan {
 35 |             table,
 36 |             table_schema,
 37 |             iterator: RefCell::new(None),
 38 |             prefetch: ScanPrefetch::new(PREFETCH_BATCH),
 39 |             table_binding: OnceLock::new(),
 40 |         }
 41 |     }
 42 | 
 43 |     fn consume_row(
 44 |         &self,
 45 |         context: &mut ExecutionContext,
 46 |         rid: RecordId,
 47 |         meta: TupleMeta,
 48 |         tuple: Tuple,
 49 |     ) -> QuillSQLResult<Option<Tuple>> {
 50 |         context
 51 |             .txn_ctx_mut()
 52 |             .read_visible_tuple(&self.table, rid, &meta, tuple)
 53 |     }
 54 | }
 55 | 
 56 | impl VolcanoExecutor for PhysicalSeqScan {
 57 |     fn init(&self, context: &mut ExecutionContext) -> QuillSQLResult<()> {
 58 |         context
 59 |             .txn_ctx_mut()
 60 |             .lock_table(self.table.clone(), LockMode::IntentionShared)?;
 61 |         let binding = resolve_table_binding(&self.table_binding, context, &self.table)?;
 62 |         let stream = binding.scan()?;
 63 |         self.iterator.replace(Some(stream));
 64 |         self.prefetch.clear();
 65 |         Ok(())
 66 |     }
 67 | 
 68 |     fn next(&self, context: &mut ExecutionContext) -> QuillSQLResult<Option<Tuple>> {
 69 |         loop {
 70 |             if let Some((rid, meta, tuple)) = self.prefetch.pop_front() {
 71 |                 if let Some(result) = self.consume_row(context, rid, meta, tuple)? {
 72 |                     return Ok(Some(result));
 73 |                 }
 74 |                 continue;
 75 |             }
 76 | 
 77 |             if !self.prefetch.refill(|limit, out| {
 78 |                 let mut guard = self.iterator.borrow_mut();
 79 |                 let stream = guard.as_mut().ok_or_else(|| stream_not_ready("SeqScan"))?;
 80 |                 for _ in 0..limit {
 81 |                     match stream.next()? {
 82 |                         Some(entry) => out.push_back(entry),
 83 |                         None => break,
 84 |                     }
 85 |                 }
 86 |                 Ok(())
 87 |             })? {
 88 |                 return Ok(None);
 89 |             }
 90 |         }
 91 |     }
 92 | 
 93 |     fn output_schema(&self) -> SchemaRef {
 94 |         self.table_schema.clone()
 95 |     }
 96 | }
 97 | 
 98 | impl std::fmt::Display for PhysicalSeqScan {
 99 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
100 |         write!(f, "SeqScan")
101 |     }
102 | }
103 | 
104 | impl std::fmt::Debug for PhysicalSeqScan {
105 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106 |         f.debug_struct("PhysicalSeqScan")
107 |             .field("table", &self.table)
108 |             .field("table_schema", &self.table_schema)
109 |             .finish()
110 |     }
111 | }
112 | 


--------------------------------------------------------------------------------
/docs/src/modules/storage.md:
--------------------------------------------------------------------------------
 1 | # Storage Engine
 2 | 
 3 | The storage engine persists relational data, covering heap files, indexes, page formats,
 4 | and the handles exposed to execution. Understanding this layer is key to reasoning about
 5 | performance, MVCC, and recovery.
 6 | 
 7 | ---
 8 | 
 9 | ## Responsibilities
10 | 
11 | - Manage `TableHeap` insert/delete/update paths and their MVCC metadata.
12 | - Maintain indexes (see the [Index module](./index.md) for details).
13 | - Expose the `StorageEngine` trait so execution can fetch `TableHandle` / `IndexHandle`
14 |   instances per table.
15 | - Provide `TupleStream` so sequential and index scans share a unified interface.
16 | 
17 | ---
18 | 
19 | ## Directory Layout
20 | 
21 | | Path | Purpose | Key Types |
22 | | ---- | ------- | --------- |
23 | | `engine.rs` | Default engine plus handle definitions. | `StorageEngine`, `TableHandle`, `TupleStream` |
24 | | `table_heap/` | Heap storage + MVCC logic. | `TableHeap`, `MvccHeap` |
25 | | `index/` | B+Tree implementation. | `BPlusTreeIndex` |
26 | | `page/` | Page, RID, tuple metadata. | `Page`, `RecordId`, `TupleMeta` |
27 | | `tuple/` | Row encoding and projection helpers. | `Tuple` |
28 | | `disk_manager.rs` | File layout and page I/O. | `DiskManager` |
29 | | `disk_scheduler.rs` | `io_uring`-backed async scheduler. | `DiskScheduler` |
30 | 
31 | ---
32 | 
33 | ## Core Abstractions
34 | 
35 | ### StorageEngine Trait
36 | ```rust
37 | pub trait StorageEngine {
38 |     fn table(&self, catalog: &Catalog, table: &TableReference)
39 |         -> QuillSQLResult<Arc<dyn TableHandle>>;
40 |     fn indexes(&self, catalog: &Catalog, table: &TableReference)
41 |         -> QuillSQLResult<Vec<Arc<dyn IndexHandle>>>;
42 | }
43 | ```
44 | The default implementation wraps the row-oriented heap + B+Tree combo, but the trait is
45 | ready for column stores, remote storage, or async engines.
46 | 
47 | ### TableHandle
48 | Offers `full_scan()`, `insert`, `delete`, `update`, and
49 | `prepare_row_for_write`. MVCC, undo, and locking concerns live here so execution operators
50 | only describe intent. Every delete/update now receives the table’s index handles so
51 | `HeapTableHandle` can delete or re-insert keys in tandem with heap tuples—exactly the
52 | behaviour CMU 15-445’s buffer/heap projects walk you through.
53 | 
54 | ### TupleStream
55 | Minimal iterator that returns `(RecordId, TupleMeta, Tuple)` triples. Index scans use
56 | `IndexScanRequest` to describe ranges.
57 | 
58 | ---
59 | 
60 | ## Interactions
61 | 
62 | - **Execution** – `ExecutionContext::table_stream` / `index_stream` delegate to handles.
63 | - **Transaction** – Handle methods call into `TxnContext` to acquire locks, record undo,
64 |   and emit WAL.
65 | - **Buffer Manager** – `TableHeap`/`BPlusTreeIndex` access pages through the shared buffer
66 |   pool.
67 | - **Recovery** – Heap/index mutations generate WAL records (`HeapInsert`, `HeapDelete`,
68 |   `IndexInsert`, …) that ARIES replays.
69 | - **Background** – MVCC vacuum and index cleanup obtain handles and iterate tuples via
70 |   the same abstractions as foreground scans.
71 | 
72 | ---
73 | 
74 | ## Teaching Ideas
75 | 
76 | - Implement a toy columnar handle to show how the execution engine can stay agnostic to
77 |   storage layout.
78 | - Extend the `TableHandle::full_scan` / `TableIterator` plumbing to accept projection hints
79 |   so students can experiment with column pruning.
80 | - Enable `RUST_LOG=storage::table_heap=trace` and trace MVCC version chains as updates
81 |   occur.
82 | - Follow the CMU 15-445 Lab 2 flow: instrument `TableBinding::delete` to print every RID
83 |   + key pair, run an UPDATE with multiple indexes, and confirm the WAL stream contains the
84 |   matching HeapInsert/HeapDelete + IndexLeafInsert/IndexLeafDelete entries.
85 | 
86 | ---
87 | 
88 | Further reading: [Disk I/O](../storage/disk_io.md),
89 | [Page & Tuple Layout](../storage/page_layouts.md),
90 | [Table Heap & MVCC](../storage/table_heap.md)
91 | 


--------------------------------------------------------------------------------
/src/utils/cache/tiny_lfu.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::hash_map::DefaultHasher;
  2 | use std::hash::{Hash, Hasher};
  3 | 
  4 | /// TinyLFU-style admission filter: approximate frequency via 4-bit counters in a simple
  5 | /// Count-Min Sketch. This is a minimal, lockless (external locking) and CPU-cheap version
  6 | /// intended to bias admission decisions before the main replacer.
  7 | #[derive(Debug)]
  8 | pub struct TinyLFU {
  9 |     width: usize,
 10 |     depth: usize,
 11 |     tables: Vec<Vec<u8>>, // 4-bit per counter packed into u8 (2 counters per byte)
 12 | }
 13 | 
 14 | impl TinyLFU {
 15 |     pub fn new(width: usize, depth: usize) -> Self {
 16 |         let width = width.next_power_of_two();
 17 |         let depth = depth.max(1).min(4);
 18 |         let tables = (0..depth).map(|_| vec![0u8; (width + 1) / 2]).collect();
 19 |         Self {
 20 |             width,
 21 |             depth,
 22 |             tables,
 23 |         }
 24 |     }
 25 | 
 26 |     #[inline]
 27 |     fn hash_i(&self, key: u64, i: usize) -> usize {
 28 |         let mut h = DefaultHasher::new();
 29 |         (key.wrapping_add((i as u64) << 32)).hash(&mut h);
 30 |         (h.finish() as usize) & (self.width - 1)
 31 |     }
 32 | 
 33 |     #[inline]
 34 |     fn load_counter(slot: &mut [u8], idx: usize) -> u8 {
 35 |         let byte = &mut slot[idx / 2];
 36 |         if idx % 2 == 0 {
 37 |             *byte & 0x0F
 38 |         } else {
 39 |             (*byte >> 4) & 0x0F
 40 |         }
 41 |     }
 42 | 
 43 |     #[inline]
 44 |     fn store_counter(slot: &mut [u8], idx: usize, val: u8) {
 45 |         let b = &mut slot[idx / 2];
 46 |         if idx % 2 == 0 {
 47 |             *b = (*b & 0xF0) | (val & 0x0F);
 48 |         } else {
 49 |             *b = (*b & 0x0F) | ((val & 0x0F) << 4);
 50 |         }
 51 |     }
 52 | 
 53 |     /// Record an access for the 64-bit key.
 54 |     pub fn admit_record(&mut self, key: u64) {
 55 |         for i in 0..self.depth {
 56 |             let idx = self.hash_i(key, i);
 57 |             let slot = &mut self.tables[i];
 58 |             let mut c = Self::load_counter(slot, idx);
 59 |             if c < 15 {
 60 |                 c += 1;
 61 |             }
 62 |             Self::store_counter(slot, idx, c);
 63 |         }
 64 |     }
 65 | 
 66 |     /// Estimate frequency for the key (min of counters).
 67 |     pub fn estimate(&self, key: u64) -> u8 {
 68 |         let mut minv = 15u8;
 69 |         for i in 0..self.depth {
 70 |             let idx = self.hash_i(key, i);
 71 |             let slot = &self.tables[i];
 72 |             let c = if idx / 2 < slot.len() {
 73 |                 if idx % 2 == 0 {
 74 |                     slot[idx / 2] & 0x0F
 75 |                 } else {
 76 |                     (slot[idx / 2] >> 4) & 0x0F
 77 |                 }
 78 |             } else {
 79 |                 0
 80 |             };
 81 |             if c < minv {
 82 |                 minv = c;
 83 |             }
 84 |         }
 85 |         minv
 86 |     }
 87 | 
 88 |     /// Periodic aging to prevent counter saturation. Halves all counters.
 89 |     pub fn age(&mut self) {
 90 |         for t in self.tables.iter_mut() {
 91 |             for b in t.iter_mut() {
 92 |                 let lo = (*b & 0x0F) >> 1;
 93 |                 let hi = ((*b >> 4) & 0x0F) >> 1;
 94 |                 *b = (hi << 4) | lo;
 95 |             }
 96 |         }
 97 |     }
 98 | }
 99 | 
100 | #[cfg(test)]
101 | mod tests {
102 |     use super::*;
103 | 
104 |     #[test]
105 |     fn tiny_lfu_basic() {
106 |         let mut f = TinyLFU::new(1024, 4);
107 |         let k1 = 123u64;
108 |         let k2 = 456u64;
109 |         for _ in 0..8 {
110 |             f.admit_record(k1);
111 |         }
112 |         for _ in 0..2 {
113 |             f.admit_record(k2);
114 |         }
115 |         assert!(f.estimate(k1) >= f.estimate(k2));
116 |         f.age();
117 |         assert!(f.estimate(k1) >= f.estimate(k2));
118 |     }
119 | }
120 | 


--------------------------------------------------------------------------------
/docs/src/storage/disk_io.md:
--------------------------------------------------------------------------------
 1 | # Disk I/O — Scheduler, io_uring Data Pages & WAL Runtime
 2 | 
 3 | ## 1. Architecture
 4 | 
 5 | - **Request Path**: foreground components enqueue `DiskRequest` objects via `DiskScheduler::{schedule_read, schedule_write, …}`. A dispatcher thread drains the global channel and distributes work round-robin to N io_uring workers. Each worker owns its own ring and file-descriptor cache, so once a request is forwarded, execution proceeds entirely off the foreground thread.
 6 | - **Stable APIs**: `schedule_read(page_id)`, `schedule_write(page_id, Bytes)`, `schedule_read_pages(Vec<PageId>)`, `schedule_allocate()`, `schedule_deallocate(page_id)` — every call returns a channel the caller can block on or poll.
 7 | - **Batch Reads**: `ReadPages` fans out per-page SQEs while a shared `BatchState` tracks completions. Even if the kernel completes I/O out of order, the caller receives a `Vec<BytesMut>` that preserves the original page order.
 8 | 
 9 | ## 2. WAL Runtime (buffered I/O)
10 | 
11 | - Dedicated WAL runtime threads handle sequential WAL appends/reads using buffered I/O. They now keep a per-thread cache of open segment files, eliminating repeated `open()`/`close()` on every log record.
12 | - Worker count defaults to `max(1, available_parallelism / 2)` but is tunable through `IOSchedulerConfig`.
13 | - Optional `sync` on a request triggers `sync_data` / `fdatasync` so `WalManager` can honour synchronous commit or checkpoint barriers. Data pages stay on the io_uring dataplane; WAL always uses buffered writes.
14 | 
15 | ## 3. io_uring Backend (Linux)
16 | 
17 | - Each worker owns an `IoUring` with configurable `queue_depth`, optional SQPOLL idle timeout, and a pool of registered fixed buffers sized to `PAGE_SIZE`. Workers submit SQEs asynchronously and drain CQEs in small batches to keep the ring warm.
18 | - Read batching relies on shared `BatchState` instances (`Rc<RefCell<_>>`) so multi-page callers see ordered results without blocking the kernel on serialization.
19 | - Writes keep their payload alive until completion; if a fixed buffer slot is available we reuse it, otherwise we fall back to heap buffers. A companion `WriteState` tracks an optional `fdatasync` so the caller still observes exactly one `Result<()>` once all CQEs land.
20 | - Errors (short read/write, errno) are normalised into `QuillSQLError` values that flow back on the original channel.
21 | 
22 | ## 4. Configuration
23 | 
24 | - `config::IOSchedulerConfig` controls:
25 |   - `workers`: number of io_uring workers (default = available parallelism).
26 |   - `wal_workers`: WAL runtime threads (default workers / 2).
27 |   - `iouring_queue_depth`, `iouring_fixed_buffers`, `iouring_sqpoll_idle_ms`.
28 |   - `fsync_on_write`: whether data-page writes also issue `fdatasync` (WAL sync is managed separately by `WalManager`).
29 | 
30 | ## 5. Concurrency & Safety
31 | 
32 | - Worker-local file descriptors plus positional I/O remove shared mutable state on the hot path. The new per-worker handle cache further reduces syscall overhead.
33 | - Shutdown sequence: enqueue `Shutdown`, dispatcher forwards it to every worker, each worker drains outstanding SQEs/CQEs, and finally dispatcher + workers are joined.
34 | - BufferPool and TableHeap integrate via the same scheduler channels; inflight guards
35 |   prevent duplicate page fetches even when multiple scans touch adjacent pages.
36 | 
37 | ## 6. Performance Notes
38 | 
39 | - Random page access benefits from fewer syscalls and deeper outstanding queue depth than the blocking fallback.
40 | - Only the io_uring backend currently ships (Linux x86_64). A portable fallback remains future work.
41 | - For large sequential scans, rely on the buffer pool's sequential access pattern or add
42 |   a custom iterator on top of `ReadPages` if you want to experiment with direct I/O.
43 | 
44 | ## 7. Future Work
45 | 
46 | - Queue-depth aware scheduling and CQE bulk harvesting.
47 | - Optional group commit (aggregate writes, single fsync) behind configuration.
48 | - Metrics hooks (queue depth, submit/complete throughput, latency percentiles, error codes).
49 | - Cross-platform fallback backend and richer prioritisation/throttling policies.
50 | - Control-plane knobs for throttling individual background workers.
51 | 


--------------------------------------------------------------------------------
/src/recovery/resource_manager.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::HashMap;
  2 | use std::sync::{Arc, RwLock};
  3 | 
  4 | use once_cell::sync::Lazy;
  5 | use std::sync::OnceLock;
  6 | 
  7 | use crate::buffer::BufferManager;
  8 | use crate::error::{QuillSQLError, QuillSQLResult};
  9 | use crate::recovery::wal::codec::{decode_page_write, ResourceManagerId, WalFrame};
 10 | use crate::recovery::Lsn;
 11 | use crate::storage::disk_scheduler::DiskScheduler;
 12 | 
 13 | #[derive(Clone)]
 14 | pub struct RedoContext {
 15 |     pub disk_scheduler: Arc<DiskScheduler>,
 16 |     pub buffer_pool: Option<Arc<BufferManager>>,
 17 | }
 18 | 
 19 | #[derive(Clone)]
 20 | pub struct UndoContext {
 21 |     pub disk_scheduler: Arc<DiskScheduler>,
 22 |     pub buffer_pool: Option<Arc<BufferManager>>,
 23 | }
 24 | 
 25 | pub trait ResourceManager: Send + Sync {
 26 |     fn redo(&self, frame: &WalFrame, ctx: &RedoContext) -> QuillSQLResult<usize>;
 27 |     fn undo(&self, frame: &WalFrame, ctx: &UndoContext) -> QuillSQLResult<()>;
 28 | 
 29 |     fn transaction_id(&self, _frame: &WalFrame) -> Option<u64> {
 30 |         None
 31 |     }
 32 | }
 33 | 
 34 | static REGISTRY: Lazy<RwLock<HashMap<ResourceManagerId, Arc<dyn ResourceManager>>>> =
 35 |     Lazy::new(|| RwLock::new(HashMap::new()));
 36 | 
 37 | pub fn register_resource_manager(id: ResourceManagerId, manager: Arc<dyn ResourceManager>) {
 38 |     let mut guard = REGISTRY
 39 |         .write()
 40 |         .expect("resource manager registry poisoned");
 41 |     guard.insert(id, manager);
 42 | }
 43 | 
 44 | pub fn get_resource_manager(id: ResourceManagerId) -> Option<Arc<dyn ResourceManager>> {
 45 |     let guard = REGISTRY.read().expect("resource manager registry poisoned");
 46 |     guard.get(&id).cloned()
 47 | }
 48 | 
 49 | #[derive(Default)]
 50 | struct PageResourceManager;
 51 | 
 52 | impl PageResourceManager {
 53 |     fn page_requires_redo(
 54 |         &self,
 55 |         ctx: &RedoContext,
 56 |         page_id: u32,
 57 |         record_lsn: Lsn,
 58 |     ) -> QuillSQLResult<bool> {
 59 |         if let Some(bpm) = &ctx.buffer_pool {
 60 |             match bpm.fetch_page_read(page_id) {
 61 |                 Ok(guard) => Ok(guard.lsn() < record_lsn),
 62 |                 Err(_) => Ok(true),
 63 |             }
 64 |         } else {
 65 |             Ok(true)
 66 |         }
 67 |     }
 68 | 
 69 |     fn redo_page_write(
 70 |         &self,
 71 |         ctx: &RedoContext,
 72 |         payload: crate::recovery::wal::codec::PageWritePayload,
 73 |     ) -> QuillSQLResult<()> {
 74 |         debug_assert_eq!(payload.page_image.len(), crate::buffer::PAGE_SIZE);
 75 |         let bytes = bytes::Bytes::from(payload.page_image);
 76 |         let rx = ctx.disk_scheduler.schedule_write(payload.page_id, bytes)?;
 77 |         rx.recv().map_err(|e| {
 78 |             QuillSQLError::Internal(format!("WAL recovery write recv failed: {}", e))
 79 |         })??;
 80 |         Ok(())
 81 |     }
 82 | }
 83 | 
 84 | impl ResourceManager for PageResourceManager {
 85 |     fn redo(&self, frame: &WalFrame, ctx: &RedoContext) -> QuillSQLResult<usize> {
 86 |         if frame.info != 0 {
 87 |             return Err(QuillSQLError::Internal(format!(
 88 |                 "Unknown Page info kind: {}",
 89 |                 frame.info
 90 |             )));
 91 |         }
 92 |         let payload = decode_page_write(&frame.body)?;
 93 |         if !self.page_requires_redo(ctx, payload.page_id, frame.lsn)? {
 94 |             return Ok(0);
 95 |         }
 96 |         self.redo_page_write(ctx, payload)?;
 97 |         Ok(1)
 98 |     }
 99 | 
100 |     fn undo(&self, _frame: &WalFrame, _ctx: &UndoContext) -> QuillSQLResult<()> {
101 |         Ok(())
102 |     }
103 | }
104 | 
105 | static DEFAULT_RESOURCE_MANAGERS: OnceLock<()> = OnceLock::new();
106 | 
107 | pub fn ensure_default_resource_managers_registered() {
108 |     DEFAULT_RESOURCE_MANAGERS.get_or_init(|| {
109 |         register_resource_manager(
110 |             ResourceManagerId::Page,
111 |             Arc::new(PageResourceManager::default()),
112 |         );
113 |         crate::storage::heap_recovery::ensure_heap_resource_manager_registered();
114 |         crate::storage::index::index_recovery::ensure_index_resource_manager_registered();
115 |     });
116 | }
117 | 


--------------------------------------------------------------------------------
/src/recovery/wal/buffer.rs:
--------------------------------------------------------------------------------
  1 | use crate::recovery::wal::Lsn;
  2 | use crate::utils::ring_buffer::ConcurrentRingBuffer;
  3 | 
  4 | use super::record::WalRecord;
  5 | use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
  6 | 
  7 | #[derive(Debug)]
  8 | pub struct WalBuffer {
  9 |     queue: ConcurrentRingBuffer<WalRecord>,
 10 |     len: AtomicUsize,
 11 |     bytes: AtomicUsize,
 12 |     last_enqueued_end: AtomicU64,
 13 | }
 14 | 
 15 | impl WalBuffer {
 16 |     pub fn with_capacity(capacity: usize) -> Self {
 17 |         Self {
 18 |             queue: ConcurrentRingBuffer::with_capacity(capacity.max(1)),
 19 |             len: AtomicUsize::new(0),
 20 |             bytes: AtomicUsize::new(0),
 21 |             last_enqueued_end: AtomicU64::new(0),
 22 |         }
 23 |     }
 24 | 
 25 |     pub fn push(&self, record: WalRecord) {
 26 |         let encoded_len = record.encoded_len() as usize;
 27 |         let end_lsn = record.end_lsn;
 28 |         let mut pending = record;
 29 |         loop {
 30 |             match self.queue.try_push(pending) {
 31 |                 Ok(()) => break,
 32 |                 Err(returned) => {
 33 |                     pending = returned;
 34 |                     std::hint::spin_loop();
 35 |                 }
 36 |             }
 37 |         }
 38 |         self.len.fetch_add(1, Ordering::Release);
 39 |         self.bytes.fetch_add(encoded_len, Ordering::Release);
 40 |         self.last_enqueued_end.store(end_lsn, Ordering::Release);
 41 |     }
 42 | 
 43 |     #[inline]
 44 |     pub fn len(&self) -> usize {
 45 |         self.len.load(Ordering::Acquire)
 46 |     }
 47 | 
 48 |     #[inline]
 49 |     pub fn bytes(&self) -> usize {
 50 |         self.bytes.load(Ordering::Acquire)
 51 |     }
 52 | 
 53 |     #[inline]
 54 |     pub fn highest_end_lsn(&self) -> Lsn {
 55 |         self.last_enqueued_end.load(Ordering::Acquire)
 56 |     }
 57 | 
 58 |     #[inline]
 59 |     pub fn is_empty(&self) -> bool {
 60 |         self.len.load(Ordering::Acquire) == 0
 61 |     }
 62 | 
 63 |     pub fn drain_until(&self, upto: Lsn) -> (Vec<WalRecord>, usize) {
 64 |         let mut drained = Vec::new();
 65 |         let mut released = 0usize;
 66 |         loop {
 67 |             let Some(front) = self.queue.peek_clone() else {
 68 |                 break;
 69 |             };
 70 |             if front.end_lsn > upto {
 71 |                 break;
 72 |             }
 73 |             if let Some(record) = self.queue.pop() {
 74 |                 released += record.encoded_len() as usize;
 75 |                 drained.push(record);
 76 |             } else {
 77 |                 break;
 78 |             }
 79 |         }
 80 |         if !drained.is_empty() {
 81 |             self.len.fetch_sub(drained.len(), Ordering::Release);
 82 |             self.bytes.fetch_sub(released, Ordering::Release);
 83 |         }
 84 |         (drained, released)
 85 |     }
 86 | 
 87 |     pub fn pending(&self) -> Vec<WalRecord> {
 88 |         self.queue.snapshot()
 89 |     }
 90 | }
 91 | 
 92 | #[cfg(test)]
 93 | mod tests {
 94 |     use super::*;
 95 |     use bytes::Bytes;
 96 | 
 97 |     fn make_record(start: Lsn, len: usize) -> WalRecord {
 98 |         WalRecord {
 99 |             start_lsn: start,
100 |             end_lsn: start + len as u64,
101 |             payload: Bytes::from(vec![0u8; len]),
102 |         }
103 |     }
104 | 
105 |     #[test]
106 |     fn push_updates_length_and_bytes() {
107 |         let buffer = WalBuffer::with_capacity(8);
108 |         buffer.push(make_record(0, 16));
109 |         buffer.push(make_record(16, 32));
110 | 
111 |         assert_eq!(buffer.len(), 2);
112 |         assert_eq!(buffer.bytes(), 48);
113 |         assert_eq!(buffer.highest_end_lsn(), 48);
114 |     }
115 | 
116 |     #[test]
117 |     fn drain_until_releases_records_and_bytes() {
118 |         let buffer = WalBuffer::with_capacity(8);
119 |         buffer.push(make_record(0, 10));
120 |         buffer.push(make_record(10, 20));
121 |         buffer.push(make_record(30, 5));
122 | 
123 |         let (drained, released) = buffer.drain_until(30);
124 |         assert_eq!(drained.len(), 2);
125 |         assert_eq!(released, 30);
126 |         assert_eq!(buffer.len(), 1);
127 |         assert_eq!(buffer.bytes(), 5);
128 |         assert_eq!(buffer.highest_end_lsn(), 35);
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/recovery/wal/codec/checkpoint.rs:
--------------------------------------------------------------------------------
  1 | use crate::buffer::PageId;
  2 | use crate::error::{QuillSQLError, QuillSQLResult};
  3 | use crate::recovery::Lsn;
  4 | use crate::transaction::TransactionId;
  5 | 
  6 | #[derive(Debug, Clone)]
  7 | pub struct CheckpointPayload {
  8 |     pub last_lsn: Lsn,
  9 |     pub dirty_pages: Vec<PageId>,
 10 |     pub active_transactions: Vec<TransactionId>,
 11 |     /// Dirty Page Table: (page_id, recLSN)
 12 |     pub dpt: Vec<(PageId, Lsn)>,
 13 | }
 14 | 
 15 | pub fn encode_checkpoint(body: &CheckpointPayload) -> Vec<u8> {
 16 |     // Checkpoint (rmid=Checkpoint, info=0)
 17 |     // body: last_lsn(8) + dirty_pages_count(4) + dirty_pages[] + active_txns_count(4) + active_txns[] + dpt_count(4) + dpt[]
 18 |     let mut buf = Vec::new();
 19 |     buf.extend_from_slice(&body.last_lsn.to_le_bytes());
 20 |     buf.extend_from_slice(&(body.dirty_pages.len() as u32).to_le_bytes());
 21 |     for page_id in &body.dirty_pages {
 22 |         buf.extend_from_slice(&page_id.to_le_bytes());
 23 |     }
 24 |     buf.extend_from_slice(&(body.active_transactions.len() as u32).to_le_bytes());
 25 |     for txn_id in &body.active_transactions {
 26 |         buf.extend_from_slice(&txn_id.to_le_bytes());
 27 |     }
 28 |     buf.extend_from_slice(&(body.dpt.len() as u32).to_le_bytes());
 29 |     for (page_id, rec_lsn) in &body.dpt {
 30 |         buf.extend_from_slice(&page_id.to_le_bytes());
 31 |         buf.extend_from_slice(&rec_lsn.to_le_bytes());
 32 |     }
 33 |     buf
 34 | }
 35 | 
 36 | pub fn decode_checkpoint(bytes: &[u8]) -> QuillSQLResult<CheckpointPayload> {
 37 |     if bytes.len() < 8 + 4 + 4 + 4 {
 38 |         return Err(QuillSQLError::Internal(
 39 |             "Checkpoint payload too short".to_string(),
 40 |         ));
 41 |     }
 42 |     let last_lsn = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
 43 |     let mut offset = 8;
 44 |     let dirty_pages_len =
 45 |         u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap()) as usize;
 46 |     offset += 4;
 47 |     let mut dirty_pages = Vec::with_capacity(dirty_pages_len);
 48 |     for _ in 0..dirty_pages_len {
 49 |         if bytes.len() < offset + 4 {
 50 |             return Err(QuillSQLError::Internal(
 51 |                 "Checkpoint dirty pages truncated".to_string(),
 52 |             ));
 53 |         }
 54 |         let page_id = u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap());
 55 |         offset += 4;
 56 |         dirty_pages.push(page_id);
 57 |     }
 58 |     if bytes.len() < offset + 4 {
 59 |         return Err(QuillSQLError::Internal(
 60 |             "Checkpoint active transactions truncated".to_string(),
 61 |         ));
 62 |     }
 63 |     let active_txn_len = u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap()) as usize;
 64 |     offset += 4;
 65 |     let mut active_transactions = Vec::with_capacity(active_txn_len);
 66 |     for _ in 0..active_txn_len {
 67 |         if bytes.len() < offset + 8 {
 68 |             return Err(QuillSQLError::Internal(
 69 |                 "Checkpoint active transactions truncated".to_string(),
 70 |             ));
 71 |         }
 72 |         let txn_id = u64::from_le_bytes(bytes[offset..offset + 8].try_into().unwrap());
 73 |         offset += 8;
 74 |         active_transactions.push(txn_id);
 75 |     }
 76 |     if bytes.len() < offset + 4 {
 77 |         return Err(QuillSQLError::Internal(
 78 |             "Checkpoint DPT length missing".to_string(),
 79 |         ));
 80 |     }
 81 |     let dpt_len = u32::from_le_bytes(bytes[offset..offset + 4].try_into().unwrap()) as usize;
 82 |     offset += 4;
 83 |     let required_dpt = offset + dpt_len * (4 + 8);
 84 |     if bytes.len() < required_dpt {
 85 |         return Err(QuillSQLError::Internal(
 86 |             "Checkpoint DPT truncated".to_string(),
 87 |         ));
 88 |     }
 89 |     let mut dpt = Vec::with_capacity(dpt_len);
 90 |     let mut cur = offset;
 91 |     for _ in 0..dpt_len {
 92 |         let pid = u32::from_le_bytes(bytes[cur..cur + 4].try_into().unwrap());
 93 |         cur += 4;
 94 |         let lsn = u64::from_le_bytes(bytes[cur..cur + 8].try_into().unwrap());
 95 |         cur += 8;
 96 |         dpt.push((pid, lsn));
 97 |     }
 98 |     Ok(CheckpointPayload {
 99 |         last_lsn,
100 |         dirty_pages,
101 |         active_transactions,
102 |         dpt,
103 |     })
104 | }
105 | 


--------------------------------------------------------------------------------