├── blog
    ├── part6.md
    ├── part4.md
    ├── part5.md
    ├── part2.md
    ├── part3.md
    └── part1.md
├── README.md
├── .gitignore
├── src
    ├── engine
    │   ├── mod.rs
    │   ├── operator.rs
    │   └── plan.rs
    ├── sql
    │   ├── mod.rs
    │   ├── ast.rs
    │   ├── tokenizer.rs
    │   └── parser.rs
    ├── main.rs
    ├── value.rs
    ├── db.rs
    ├── page.rs
    ├── pager.rs
    └── cursor.rs
├── Cargo.toml
└── Cargo.lock


/blog/part6.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # rqlite
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | target
3 | 


--------------------------------------------------------------------------------
/src/engine/mod.rs:
--------------------------------------------------------------------------------
1 | mod operator;
2 | pub mod plan;
3 | 


--------------------------------------------------------------------------------
/src/sql/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod ast;
2 | mod parser;
3 | mod tokenizer;
4 | 
5 | pub use parser::{parse_create_statement, parse_statement};
6 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rsqlite"
 3 | version = "0.1.0"
 4 | edition = "2024"
 5 | 
 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 7 | 
 8 | [dependencies]
 9 | anyhow = "1.0"
10 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
 1 | # This file is automatically @generated by Cargo.
 2 | # It is not intended for manual editing.
 3 | version = 3
 4 | 
 5 | [[package]]
 6 | name = "anyhow"
 7 | version = "1.0.75"
 8 | source = "registry+https://github.com/rust-lang/crates.io-index"
 9 | checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6"
10 | 
11 | [[package]]
12 | name = "rsqlite"
13 | version = "0.1.0"
14 | dependencies = [
15 |  "anyhow",
16 | ]
17 | 


--------------------------------------------------------------------------------
/src/engine/operator.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::Context;
 2 | 
 3 | use crate::{cursor::Scanner, value::OwnedValue};
 4 | 
 5 | #[derive(Debug)]
 6 | pub enum Operator {
 7 |     SeqScan(SeqScan),
 8 | }
 9 | 
10 | impl Operator {
11 |     pub fn next_row(&mut self) -> anyhow::Result<Option<&[OwnedValue]>> {
12 |         match self {
13 |             Operator::SeqScan(s) => s.next_row(),
14 |         }
15 |     }
16 | }
17 | 
18 | #[derive(Debug)]
19 | pub struct SeqScan {
20 |     fields: Vec<usize>,
21 |     scanner: Scanner,
22 |     row_buffer: Vec<OwnedValue>,
23 | }
24 | 
25 | impl SeqScan {
26 |     pub fn new(fields: Vec<usize>, scanner: Scanner) -> Self {
27 |         let row_buffer = vec![OwnedValue::Null; fields.len()];
28 | 
29 |         Self {
30 |             fields,
31 |             scanner,
32 |             row_buffer,
33 |         }
34 |     }
35 | 
36 |     fn next_row(&mut self) -> anyhow::Result<Option<&[OwnedValue]>> {
37 |         let Some(mut record) = self.scanner.next_record()? else {
38 |             return Ok(None);
39 |         };
40 | 
41 |         for (i, &n) in self.fields.iter().enumerate() {
42 |             self.row_buffer[i] = record.owned_field(n)?.context("missing record field")?;
43 |         }
44 | 
45 |         Ok(Some(&self.row_buffer))
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/sql/ast.rs:
--------------------------------------------------------------------------------
 1 | #[derive(Debug, Clone, Eq, PartialEq)]
 2 | pub enum Statement {
 3 |     Select(SelectStatement),
 4 |     CreateTable(CreateTableStatement),
 5 | }
 6 | 
 7 | #[derive(Debug, Clone, Eq, PartialEq)]
 8 | pub struct CreateTableStatement {
 9 |     pub name: String,
10 |     pub columns: Vec<ColumnDef>,
11 | }
12 | 
13 | #[derive(Debug, Clone, Eq, PartialEq)]
14 | pub struct ColumnDef {
15 |     pub name: String,
16 |     pub col_type: Type,
17 | }
18 | 
19 | #[derive(Debug, Clone, Eq, PartialEq)]
20 | pub enum Type {
21 |     Integer,
22 |     Real,
23 |     Text,
24 |     Blob,
25 | }
26 | 
27 | #[derive(Debug, Clone, Eq, PartialEq)]
28 | pub struct SelectStatement {
29 |     pub core: SelectCore,
30 | }
31 | 
32 | #[derive(Debug, Clone, Eq, PartialEq)]
33 | pub struct SelectCore {
34 |     pub result_columns: Vec<ResultColumn>,
35 |     pub from: SelectFrom,
36 | }
37 | 
38 | #[derive(Debug, Clone, Eq, PartialEq)]
39 | pub enum ResultColumn {
40 |     Star,
41 |     Expr(ExprResultColumn),
42 | }
43 | 
44 | #[derive(Debug, Clone, Eq, PartialEq)]
45 | pub struct ExprResultColumn {
46 |     pub expr: Expr,
47 |     pub alias: Option<String>,
48 | }
49 | 
50 | #[derive(Debug, Clone, Eq, PartialEq)]
51 | pub enum Expr {
52 |     Column(Column),
53 | }
54 | 
55 | #[derive(Debug, Clone, Eq, PartialEq)]
56 | pub struct Column {
57 |     pub name: String,
58 | }
59 | 
60 | #[derive(Debug, Clone, Eq, PartialEq)]
61 | pub enum SelectFrom {
62 |     Table(String),
63 | }
64 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{BufRead, Write, stdin};
 2 | 
 3 | use anyhow::Context;
 4 | 
 5 | mod cursor;
 6 | mod db;
 7 | mod engine;
 8 | mod page;
 9 | mod pager;
10 | mod sql;
11 | mod value;
12 | 
13 | fn main() -> anyhow::Result<()> {
14 |     let database = db::Db::from_file(std::env::args().nth(1).context("missing db file")?)?;
15 |     cli(database)
16 | }
17 | 
18 | fn cli(mut db: db::Db) -> anyhow::Result<()> {
19 |     print_flushed("rqlite> ")?;
20 | 
21 |     let mut line_buffer = String::new();
22 | 
23 |     while stdin().lock().read_line(&mut line_buffer).is_ok() {
24 |         match line_buffer.trim() {
25 |             ".exit" => break,
26 |             ".tables" => display_tables(&mut db)?,
27 |             stmt => eval_query(&db, stmt)?,
28 |         }
29 | 
30 |         print_flushed("\nrqlite> ")?;
31 | 
32 |         line_buffer.clear();
33 |     }
34 | 
35 |     Ok(())
36 | }
37 | 
38 | fn display_tables(db: &mut db::Db) -> anyhow::Result<()> {
39 |     for table in &db.tables_metadata {
40 |         print!("{} ", &table.name)
41 |     }
42 |     Ok(())
43 | }
44 | 
45 | fn print_flushed(s: &str) -> anyhow::Result<()> {
46 |     print!("{s}");
47 |     std::io::stdout().flush().context("flush stdout")
48 | }
49 | 
50 | fn eval_query(db: &db::Db, query: &str) -> anyhow::Result<()> {
51 |     let parsed_query = sql::parse_statement(query, false)?;
52 |     let mut op = engine::plan::Planner::new(db).compile(&parsed_query)?;
53 | 
54 |     while let Some(values) = op.next_row()? {
55 |         let formated = values
56 |             .iter()
57 |             .map(ToString::to_string)
58 |             .collect::<Vec<_>>()
59 |             .join("|");
60 | 
61 |         println!("{formated}");
62 |     }
63 | 
64 |     Ok(())
65 | }
66 | 


--------------------------------------------------------------------------------
/src/value.rs:
--------------------------------------------------------------------------------
 1 | use std::{borrow::Cow, rc::Rc};
 2 | 
 3 | #[derive(Debug, Clone)]
 4 | pub enum Value<'p> {
 5 |     Null,
 6 |     String(Cow<'p, str>),
 7 |     Blob(Cow<'p, [u8]>),
 8 |     Int(i64),
 9 |     Float(f64),
10 | }
11 | 
12 | impl Value<'_> {
13 |     pub fn as_str(&self) -> Option<&str> {
14 |         if let Value::String(s) = self {
15 |             Some(s.as_ref())
16 |         } else {
17 |             None
18 |         }
19 |     }
20 | 
21 |     pub fn as_int(&self) -> Option<i64> {
22 |         if let Value::Int(i) = self {
23 |             Some(*i)
24 |         } else {
25 |             None
26 |         }
27 |     }
28 | }
29 | 
30 | #[derive(Debug, Clone)]
31 | pub enum OwnedValue {
32 |     Null,
33 |     String(Rc<String>),
34 |     Blob(Rc<Vec<u8>>),
35 |     Int(i64),
36 |     Float(f64),
37 | }
38 | 
39 | impl<'p> From<Value<'p>> for OwnedValue {
40 |     fn from(value: Value<'p>) -> Self {
41 |         match value {
42 |             Value::Null => Self::Null,
43 |             Value::Int(i) => Self::Int(i),
44 |             Value::Float(f) => Self::Float(f),
45 |             Value::Blob(b) => Self::Blob(Rc::new(b.into_owned())),
46 |             Value::String(s) => Self::String(Rc::new(s.into_owned())),
47 |         }
48 |     }
49 | }
50 | 
51 | impl std::fmt::Display for OwnedValue {
52 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53 |         match self {
54 |             OwnedValue::Null => write!(f, "null"),
55 |             OwnedValue::String(s) => s.fmt(f),
56 |             OwnedValue::Blob(items) => {
57 |                 write!(
58 |                     f,
59 |                     "{}",
60 |                     items
61 |                         .iter()
62 |                         .filter_map(|&n| char::from_u32(n as u32).filter(char::is_ascii))
63 |                         .collect::<String>()
64 |                 )
65 |             }
66 |             OwnedValue::Int(i) => i.fmt(f),
67 |             OwnedValue::Float(x) => x.fmt(f),
68 |         }
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/engine/plan.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::{bail, Context, Ok};
 2 | 
 3 | use crate::{
 4 |     db::Db,
 5 |     sql::ast::{self, SelectFrom},
 6 | };
 7 | 
 8 | use super::operator::{Operator, SeqScan};
 9 | 
10 | pub struct Planner<'d> {
11 |     db: &'d Db,
12 | }
13 | 
14 | impl<'d> Planner<'d> {
15 |     pub fn new(db: &'d Db) -> Self {
16 |         Self { db }
17 |     }
18 |     pub fn compile(self, statement: &ast::Statement) -> anyhow::Result<Operator> {
19 |         match statement {
20 |             ast::Statement::Select(s) => self.compile_select(s),
21 |             stmt => bail!("unsupported statement: {stmt:?}"),
22 |         }
23 |     }
24 | 
25 |     fn compile_select(self, select: &ast::SelectStatement) -> anyhow::Result<Operator> {
26 |         let SelectFrom::Table(table_name) = &select.core.from;
27 | 
28 |         let table = self
29 |             .db
30 |             .tables_metadata
31 |             .iter()
32 |             .find(|m| &m.name == table_name)
33 |             .with_context(|| format!("invalid table name: {table_name}"))?;
34 | 
35 |         let mut columns = Vec::new();
36 | 
37 |         for res_col in &select.core.result_columns {
38 |             match res_col {
39 |                 ast::ResultColumn::Star => {
40 |                     for i in 0..table.columns.len() {
41 |                         columns.push(i);
42 |                     }
43 |                 }
44 |                 ast::ResultColumn::Expr(e) => {
45 |                     let ast::Expr::Column(col) = &e.expr;
46 |                     let (index, _) = table
47 |                         .columns
48 |                         .iter()
49 |                         .enumerate()
50 |                         .find(|(_, c)| c.name == col.name)
51 |                         .with_context(|| format!("invalid column name: {}", col.name))?;
52 |                     columns.push(index);
53 |                 }
54 |             }
55 |         }
56 | 
57 |         Ok(Operator::SeqScan(SeqScan::new(
58 |             columns,
59 |             self.db.scanner(table.first_page),
60 |         )))
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/sql/tokenizer.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::bail;
 2 | 
 3 | #[derive(Debug, Eq, PartialEq)]
 4 | pub enum Token {
 5 |     Create,
 6 |     Table,
 7 |     Select,
 8 |     As,
 9 |     From,
10 |     LPar,
11 |     RPar,
12 |     Star,
13 |     Comma,
14 |     SemiColon,
15 |     Identifier(String),
16 | }
17 | 
18 | impl Token {
19 |     pub fn as_identifier(&self) -> Option<&str> {
20 |         match self {
21 |             Token::Identifier(ident) => Some(ident),
22 |             _ => None,
23 |         }
24 |     }
25 | }
26 | 
27 | pub fn tokenize(input: &str) -> anyhow::Result<Vec<Token>> {
28 |     let mut tokens = Vec::new();
29 |     let mut chars = input.chars().peekable();
30 | 
31 |     while let Some(c) = chars.next() {
32 |         match c {
33 |             '(' => tokens.push(Token::LPar),
34 |             ')' => tokens.push(Token::RPar),
35 |             '*' => tokens.push(Token::Star),
36 |             ',' => tokens.push(Token::Comma),
37 |             ';' => tokens.push(Token::SemiColon),
38 |             c if c.is_whitespace() => continue,
39 |             c if c.is_alphabetic() => {
40 |                 let mut ident = c.to_string().to_lowercase();
41 |                 while let Some(cc) = chars.next_if(|&cc| cc.is_alphanumeric() || cc == '_') {
42 |                     ident.extend(cc.to_lowercase());
43 |                 }
44 | 
45 |                 match ident.as_str() {
46 |                     "create" => tokens.push(Token::Create),
47 |                     "table" => tokens.push(Token::Table),
48 |                     "select" => tokens.push(Token::Select),
49 |                     "as" => tokens.push(Token::As),
50 |                     "from" => tokens.push(Token::From),
51 |                     _ => tokens.push(Token::Identifier(ident)),
52 |                 }
53 |             }
54 |             _ => bail!("unexpected character: {}", c),
55 |         }
56 |     }
57 | 
58 |     Ok(tokens)
59 | }
60 | 
61 | #[cfg(test)]
62 | mod tests {
63 |     use super::*;
64 | 
65 |     #[test]
66 |     fn tokenize_select() {
67 |         let input = "SeLect *, col as c FroM TableName_1;";
68 |         let expected = vec![
69 |             Token::Select,
70 |             Token::Star,
71 |             Token::Comma,
72 |             Token::Identifier("col".to_string()),
73 |             Token::As,
74 |             Token::Identifier("c".to_string()),
75 |             Token::From,
76 |             Token::Identifier("tablename_1".to_string()),
77 |             Token::SemiColon,
78 |         ];
79 |         assert_eq!(tokenize(input).unwrap(), expected);
80 |     }
81 | 
82 |     #[test]
83 |     fn tokenize_invalid_char() {
84 |         let input = "select @ from table;";
85 |         assert!(tokenize(input).is_err());
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/db.rs:
--------------------------------------------------------------------------------
 1 | use std::{io::Read, path::Path};
 2 | 
 3 | use anyhow::Context;
 4 | 
 5 | use crate::{
 6 |     cursor::{Cursor, Scanner},
 7 |     pager::{self, Pager},
 8 |     sql::{self, ast},
 9 | };
10 | 
11 | #[derive(Debug, Clone)]
12 | pub struct TableMetadata {
13 |     pub name: String,
14 |     pub columns: Vec<ast::ColumnDef>,
15 |     pub first_page: usize,
16 | }
17 | 
18 | impl TableMetadata {
19 |     fn from_cursor(mut cursor: Cursor) -> anyhow::Result<Option<Self>> {
20 |         let type_value = cursor
21 |             .field(0)?
22 |             .context("missing type field")
23 |             .context("invalid type field")?;
24 | 
25 |         if type_value.as_str() != Some("table") {
26 |             return Ok(None);
27 |         }
28 | 
29 |         let create_stmt = cursor
30 |             .field(4)?
31 |             .context("missing create statement")
32 |             .context("invalid create statement")?
33 |             .as_str()
34 |             .context("table create statement should be a string")?
35 |             .to_owned();
36 | 
37 |         let create = sql::parse_create_statement(&create_stmt)?;
38 | 
39 |         let first_page = cursor
40 |             .field(3)?
41 |             .context("missing table first page")?
42 |             .as_int()
43 |             .context("table first page should be an integer")? as usize;
44 | 
45 |         Ok(Some(TableMetadata {
46 |             name: create.name,
47 |             columns: create.columns,
48 |             first_page,
49 |         }))
50 |     }
51 | }
52 | 
53 | pub struct Db {
54 |     pub tables_metadata: Vec<TableMetadata>,
55 |     pager: Pager,
56 | }
57 | 
58 | impl Db {
59 |     pub fn from_file(filename: impl AsRef<Path>) -> anyhow::Result<Db> {
60 |         let mut file = std::fs::File::open(filename.as_ref()).context("open db file")?;
61 | 
62 |         let mut header_buffer = [0; pager::HEADER_SIZE];
63 |         file.read_exact(&mut header_buffer)
64 |             .context("read db header")?;
65 | 
66 |         let header = pager::parse_header(&header_buffer).context("parse db header")?;
67 | 
68 |         let pager = Pager::new(header, file);
69 | 
70 |         let tables_metadata = Self::collect_tables_metadata(pager.clone())?;
71 | 
72 |         Ok(Db {
73 |             pager,
74 |             tables_metadata,
75 |         })
76 |     }
77 | 
78 |     pub fn scanner(&self, page: usize) -> Scanner {
79 |         Scanner::new(page, self.pager.clone())
80 |     }
81 | 
82 |     fn collect_tables_metadata(pager: Pager) -> anyhow::Result<Vec<TableMetadata>> {
83 |         let mut metadata = Vec::new();
84 |         let mut scanner = Scanner::new(1, pager);
85 | 
86 |         while let Some(record) = scanner.next_record()? {
87 |             if let Some(m) = TableMetadata::from_cursor(record)? {
88 |                 metadata.push(m);
89 |             }
90 |         }
91 | 
92 |         Ok(metadata)
93 |     }
94 | }
95 | 


--------------------------------------------------------------------------------
/src/page.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::bail;
  2 | 
  3 | #[derive(Debug, Copy, Clone)]
  4 | pub struct DbHeader {
  5 |     pub page_size: u32,
  6 |     pub page_reserved_size: u8,
  7 | }
  8 | 
  9 | impl DbHeader {
 10 |     pub fn usable_page_size(&self) -> usize {
 11 |         self.page_size as usize - (self.page_reserved_size as usize)
 12 |     }
 13 | }
 14 | 
 15 | #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 16 | pub enum PageType {
 17 |     TableLeaf,
 18 |     TableInterior,
 19 | }
 20 | 
 21 | #[derive(Debug, Copy, Clone)]
 22 | pub struct PageHeader {
 23 |     pub page_type: PageType,
 24 |     pub cell_count: u16,
 25 |     pub rightmost_pointer: Option<u32>,
 26 | }
 27 | 
 28 | impl PageHeader {
 29 |     pub fn byte_size(&self) -> usize {
 30 |         if self.rightmost_pointer.is_some() {
 31 |             12
 32 |         } else {
 33 |             8
 34 |         }
 35 |     }
 36 | 
 37 |     pub fn local_and_overflow_size(
 38 |         &self,
 39 |         db_header: &DbHeader,
 40 |         payload_size: usize,
 41 |     ) -> anyhow::Result<(usize, Option<usize>)> {
 42 |         let local = self.local_payload_size(db_header, payload_size)?;
 43 |         if local == payload_size {
 44 |             Ok((local, None))
 45 |         } else {
 46 |             Ok((local, Some(payload_size.saturating_sub(local))))
 47 |         }
 48 |     }
 49 | 
 50 |     fn local_payload_size(
 51 |         &self,
 52 |         db_header: &DbHeader,
 53 |         payload_size: usize,
 54 |     ) -> anyhow::Result<usize> {
 55 |         match self.page_type {
 56 |             PageType::TableInterior => bail!("no payload size for interior pages"),
 57 |             PageType::TableLeaf => {
 58 |                 let usable = db_header.usable_page_size();
 59 |                 let max_size = usable - 35;
 60 |                 if payload_size <= max_size {
 61 |                     return Ok(payload_size);
 62 |                 }
 63 |                 let min_size = ((usable - 12) * 32 / 255) - 23;
 64 |                 let k = min_size + ((payload_size - min_size) % (usable - 4));
 65 |                 let size = if k <= max_size { k } else { min_size };
 66 |                 Ok(size)
 67 |             }
 68 |         }
 69 |     }
 70 | }
 71 | 
 72 | #[derive(Debug, Clone)]
 73 | pub struct Page {
 74 |     pub header: PageHeader,
 75 |     pub cells: Vec<Cell>,
 76 | }
 77 | 
 78 | impl Page {
 79 |     pub fn get(&self, n: usize) -> Option<&Cell> {
 80 |         self.cells.get(n)
 81 |     }
 82 | }
 83 | 
 84 | #[derive(Debug, Clone)]
 85 | pub struct TableLeafCell {
 86 |     pub payload: Vec<u8>,
 87 |     pub first_overflow: Option<usize>,
 88 | }
 89 | 
 90 | #[derive(Debug, Clone)]
 91 | pub struct TableInteriorCell {
 92 |     pub left_child_page: u32,
 93 | }
 94 | 
 95 | #[derive(Debug, Clone)]
 96 | pub enum Cell {
 97 |     TableLeaf(TableLeafCell),
 98 |     TableInterior(TableInteriorCell),
 99 | }
100 | 
101 | impl From<TableLeafCell> for Cell {
102 |     fn from(cell: TableLeafCell) -> Self {
103 |         Cell::TableLeaf(cell)
104 |     }
105 | }
106 | 
107 | impl From<TableInteriorCell> for Cell {
108 |     fn from(cell: TableInteriorCell) -> Self {
109 |         Cell::TableInterior(cell)
110 |     }
111 | }
112 | 
113 | #[derive(Debug, Clone)]
114 | pub struct OverflowPage {
115 |     pub next: Option<usize>,
116 |     pub payload: Vec<u8>,
117 | }
118 | 


--------------------------------------------------------------------------------
/src/sql/parser.rs:
--------------------------------------------------------------------------------
  1 | use anyhow::{bail, Context};
  2 | 
  3 | use crate::sql::{
  4 |     ast::{
  5 |         Column, ColumnDef, CreateTableStatement, Expr, ExprResultColumn, ResultColumn, SelectCore,
  6 |         SelectFrom, SelectStatement, Statement, Type,
  7 |     },
  8 |     tokenizer::{self, Token},
  9 | };
 10 | 
 11 | #[derive(Debug)]
 12 | struct ParserState {
 13 |     tokens: Vec<Token>,
 14 |     pos: usize,
 15 | }
 16 | 
 17 | impl ParserState {
 18 |     fn new(tokens: Vec<Token>) -> Self {
 19 |         Self { tokens, pos: 0 }
 20 |     }
 21 | 
 22 |     fn parse_statement(&mut self) -> anyhow::Result<Statement> {
 23 |         match self.peek_next_token().context("unexpected end of input")? {
 24 |             Token::Select => self.parse_select().map(Statement::Select),
 25 |             Token::Create => self.parse_create_table().map(Statement::CreateTable),
 26 |             token => bail!("unexpected token: {token:?}"),
 27 |         }
 28 |     }
 29 | 
 30 |     fn parse_create_table(&mut self) -> anyhow::Result<CreateTableStatement> {
 31 |         self.expect_eq(Token::Create)?;
 32 |         self.expect_eq(Token::Table)?;
 33 |         let name = self.expect_identifier()?.to_string();
 34 |         self.expect_eq(Token::LPar)?;
 35 |         let mut columns = vec![self.parse_column_def()?];
 36 |         while self.next_token_is(Token::Comma) {
 37 |             self.advance();
 38 |             columns.push(self.parse_column_def()?);
 39 |         }
 40 |         self.expect_eq(Token::RPar)?;
 41 |         Ok(CreateTableStatement { name, columns })
 42 |     }
 43 | 
 44 |     fn parse_column_def(&mut self) -> anyhow::Result<ColumnDef> {
 45 |         Ok(ColumnDef {
 46 |             name: self.expect_identifier()?.to_string(),
 47 |             col_type: self.parse_type()?,
 48 |         })
 49 |     }
 50 | 
 51 |     fn parse_type(&mut self) -> anyhow::Result<Type> {
 52 |         let type_name = self.expect_identifier()?;
 53 |         let t = match type_name.to_lowercase().as_str() {
 54 |             "integer" => Type::Integer,
 55 |             "real" => Type::Real,
 56 |             "blob" => Type::Blob,
 57 |             "text" | "string" => Type::Text,
 58 |             _ => bail!("unsupported type: {type_name}"),
 59 |         };
 60 |         Ok(t)
 61 |     }
 62 | 
 63 |     fn parse_select(&mut self) -> anyhow::Result<SelectStatement> {
 64 |         self.expect_eq(Token::Select)?;
 65 |         let result_columns = self.parse_result_columns()?;
 66 |         self.expect_eq(Token::From)?;
 67 |         let from = self.parse_select_from()?;
 68 |         Ok(SelectStatement {
 69 |             core: SelectCore {
 70 |                 result_columns,
 71 |                 from,
 72 |             },
 73 |         })
 74 |     }
 75 | 
 76 |     fn parse_select_from(&mut self) -> anyhow::Result<SelectFrom> {
 77 |         let table = self.expect_identifier()?;
 78 |         Ok(SelectFrom::Table(table.to_string()))
 79 |     }
 80 | 
 81 |     fn parse_result_columns(&mut self) -> anyhow::Result<Vec<ResultColumn>> {
 82 |         let mut result_coluns = vec![self.parse_result_column()?];
 83 |         while self.next_token_is(Token::Comma) {
 84 |             self.advance();
 85 |             result_coluns.push(self.parse_result_column()?);
 86 |         }
 87 |         Ok(result_coluns)
 88 |     }
 89 | 
 90 |     fn parse_result_column(&mut self) -> anyhow::Result<ResultColumn> {
 91 |         if self.peek_next_token()? == &Token::Star {
 92 |             self.advance();
 93 |             return Ok(ResultColumn::Star);
 94 |         }
 95 | 
 96 |         Ok(ResultColumn::Expr(self.parse_expr_result_column()?))
 97 |     }
 98 | 
 99 |     fn parse_expr_result_column(&mut self) -> anyhow::Result<ExprResultColumn> {
100 |         let expr = self.parse_expr()?;
101 |         let alias = if self.next_token_is(Token::As) {
102 |             self.advance();
103 |             Some(self.expect_identifier()?.to_string())
104 |         } else {
105 |             None
106 |         };
107 |         Ok(ExprResultColumn { expr, alias })
108 |     }
109 | 
110 |     fn parse_expr(&mut self) -> anyhow::Result<Expr> {
111 |         Ok(Expr::Column(Column {
112 |             name: self.expect_identifier()?.to_string(),
113 |         }))
114 |     }
115 | 
116 |     fn next_token_is(&self, expected: Token) -> bool {
117 |         self.tokens.get(self.pos) == Some(&expected)
118 |     }
119 | 
120 |     fn expect_identifier(&mut self) -> anyhow::Result<&str> {
121 |         self.expect_matching(|t| matches!(t, Token::Identifier(_)))
122 |             .map(|t| t.as_identifier().unwrap())
123 |     }
124 | 
125 |     fn expect_eq(&mut self, expected: Token) -> anyhow::Result<&Token> {
126 |         self.expect_matching(|t| *t == expected)
127 |     }
128 | 
129 |     fn expect_matching(&mut self, f: impl Fn(&Token) -> bool) -> anyhow::Result<&Token> {
130 |         match self.next_token() {
131 |             Some(token) if f(token) => Ok(token),
132 |             Some(token) => bail!("unexpected token: {:?}", token),
133 |             None => bail!("unexpected end of input"),
134 |         }
135 |     }
136 | 
137 |     fn peek_next_token(&self) -> anyhow::Result<&Token> {
138 |         self.tokens.get(self.pos).context("unexpected end of input")
139 |     }
140 | 
141 |     fn next_token(&mut self) -> Option<&Token> {
142 |         let token = self.tokens.get(self.pos);
143 |         if token.is_some() {
144 |             self.pos += 1;
145 |         }
146 |         token
147 |     }
148 | 
149 |     fn advance(&mut self) {
150 |         self.pos += 1;
151 |     }
152 | }
153 | 
154 | pub fn parse_statement(input: &str, trailing_semicolon: bool) -> anyhow::Result<Statement> {
155 |     let tokens = tokenizer::tokenize(input)?;
156 |     let mut state = ParserState::new(tokens);
157 |     let statement = state.parse_statement()?;
158 |     if trailing_semicolon {
159 |         state.expect_eq(Token::SemiColon)?;
160 |     }
161 |     Ok(statement)
162 | }
163 | 
164 | pub fn parse_create_statement(input: &str) -> anyhow::Result<CreateTableStatement> {
165 |     match parse_statement(input, false)? {
166 |         Statement::CreateTable(c) => Ok(c),
167 |         Statement::Select(_) => bail!("expected a create statement"),
168 |     }
169 | }
170 | 
171 | #[cfg(test)]
172 | mod tests {
173 |     use super::*;
174 | 
175 |     #[test]
176 |     fn create_table() {
177 |         let input = "create table table1(key integer, value text)";
178 |         let statement = parse_statement(input, false).unwrap();
179 |         assert_eq!(
180 |             statement,
181 |             Statement::CreateTable(CreateTableStatement {
182 |                 name: "table1".to_string(),
183 |                 columns: vec![
184 |                     ColumnDef {
185 |                         name: "key".to_string(),
186 |                         col_type: Type::Integer,
187 |                     },
188 |                     ColumnDef {
189 |                         name: "value".to_string(),
190 |                         col_type: Type::Text,
191 |                     }
192 |                 ]
193 |             })
194 |         )
195 |     }
196 | 
197 |     #[test]
198 |     fn select_star_from_table() {
199 |         let input = "select * from table1";
200 |         let statement = parse_statement(input, false).unwrap();
201 |         assert_eq!(
202 |             statement,
203 |             Statement::Select(SelectStatement {
204 |                 core: SelectCore {
205 |                     result_columns: vec![ResultColumn::Star],
206 |                     from: SelectFrom::Table("table1".to_string()),
207 |                 },
208 |             })
209 |         );
210 |     }
211 | 
212 |     #[test]
213 |     fn select_columns_from_table() {
214 |         let input = "select col1 as first, col2 from table1;";
215 |         let statement = parse_statement(input, true).unwrap();
216 |         assert_eq!(
217 |             statement,
218 |             Statement::Select(SelectStatement {
219 |                 core: SelectCore {
220 |                     result_columns: vec![
221 |                         ResultColumn::Expr(ExprResultColumn {
222 |                             expr: Expr::Column(Column {
223 |                                 name: "col1".to_string()
224 |                             }),
225 |                             alias: Some("first".to_string())
226 |                         }),
227 |                         ResultColumn::Expr(ExprResultColumn {
228 |                             expr: Expr::Column(Column {
229 |                                 name: "col2".to_string()
230 |                             }),
231 |                             alias: None
232 |                         }),
233 |                     ],
234 |                     from: SelectFrom::Table("table1".to_string()),
235 |                 },
236 |             })
237 |         );
238 |     }
239 | }
240 | 


--------------------------------------------------------------------------------
/src/pager.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     collections::HashMap,
  3 |     io::{Read, Seek, SeekFrom},
  4 |     sync::{Arc, Mutex, RwLock},
  5 | };
  6 | 
  7 | use anyhow::{Context, anyhow, bail};
  8 | 
  9 | use crate::page::{self, DbHeader, PageHeader};
 10 | 
 11 | pub const HEADER_SIZE: usize = 100;
 12 | const HEADER_PREFIX: &[u8] = b"SQLite format 3\0";
 13 | const HEADER_PAGE_SIZE_OFFSET: usize = 16;
 14 | const HEADER_PAGE_RESERVED_SIZE_OFFSET: usize = 20;
 15 | 
 16 | const PAGE_MAX_SIZE: u32 = 65536;
 17 | 
 18 | const PAGE_LEAF_TABLE_ID: u8 = 0x0d;
 19 | const PAGE_INTERIOR_TABLE_ID: u8 = 0x05;
 20 | 
 21 | const PAGE_CELL_COUNT_OFFSET: usize = 3;
 22 | const PAGE_RIGHTMOST_POINTER_OFFSET: usize = 8;
 23 | 
 24 | #[derive(Debug, Clone)]
 25 | enum CachedPage {
 26 |     Page(Arc<page::Page>),
 27 |     Overflow(Arc<page::OverflowPage>),
 28 | }
 29 | 
 30 | impl From<Arc<page::Page>> for CachedPage {
 31 |     fn from(value: Arc<page::Page>) -> Self {
 32 |         CachedPage::Page(value)
 33 |     }
 34 | }
 35 | 
 36 | impl TryFrom<CachedPage> for Arc<page::Page> {
 37 |     type Error = anyhow::Error;
 38 | 
 39 |     fn try_from(value: CachedPage) -> Result<Self, Self::Error> {
 40 |         if let CachedPage::Page(p) = value {
 41 |             Ok(p.clone())
 42 |         } else {
 43 |             bail!("expected a regular page")
 44 |         }
 45 |     }
 46 | }
 47 | 
 48 | impl From<Arc<page::OverflowPage>> for CachedPage {
 49 |     fn from(value: Arc<page::OverflowPage>) -> Self {
 50 |         CachedPage::Overflow(value)
 51 |     }
 52 | }
 53 | 
 54 | impl TryFrom<CachedPage> for Arc<page::OverflowPage> {
 55 |     type Error = anyhow::Error;
 56 | 
 57 |     fn try_from(value: CachedPage) -> Result<Self, Self::Error> {
 58 |         if let CachedPage::Overflow(o) = value {
 59 |             Ok(o.clone())
 60 |         } else {
 61 |             bail!("expected an overflow page")
 62 |         }
 63 |     }
 64 | }
 65 | 
 66 | #[derive(Debug)]
 67 | pub struct Pager<I: Read + Seek = std::fs::File> {
 68 |     input: Arc<Mutex<I>>,
 69 |     pages: Arc<RwLock<HashMap<usize, CachedPage>>>,
 70 |     header: DbHeader,
 71 | }
 72 | 
 73 | impl<I: Read + Seek> Pager<I> {
 74 |     pub fn new(header: DbHeader, input: I) -> Self {
 75 |         Self {
 76 |             input: Arc::new(Mutex::new(input)),
 77 |             pages: Arc::default(),
 78 |             header,
 79 |         }
 80 |     }
 81 | 
 82 |     pub fn read_overflow(&self, n: usize) -> anyhow::Result<Arc<page::OverflowPage>> {
 83 |         self.load(n, |buffer| Ok(parse_overflow_page(buffer)))
 84 |     }
 85 | 
 86 |     pub fn read_page(&self, n: usize) -> anyhow::Result<Arc<page::Page>> {
 87 |         self.load(n, |buffer| parse_page(&self.header, buffer, n))
 88 |     }
 89 | 
 90 |     fn load<T>(&self, n: usize, f: impl Fn(&[u8]) -> anyhow::Result<T>) -> anyhow::Result<Arc<T>>
 91 |     where
 92 |         Arc<T>: Into<CachedPage>,
 93 |         CachedPage: TryInto<Arc<T>, Error = anyhow::Error>,
 94 |     {
 95 |         {
 96 |             let read_pages = self
 97 |                 .pages
 98 |                 .read()
 99 |                 .map_err(|_| anyhow!("poisoned page cache lock"))?;
100 | 
101 |             if let Some(page) = read_pages.get(&n).cloned() {
102 |                 return page.try_into();
103 |             }
104 |         }
105 | 
106 |         let mut write_pages = self
107 |             .pages
108 |             .write()
109 |             .map_err(|_| anyhow!("failed to acquire pager write lock"))?;
110 | 
111 |         if let Some(page) = write_pages.get(&n).cloned() {
112 |             return page.try_into();
113 |         }
114 | 
115 |         let buffer = self.load_raw(n)?;
116 |         let parsed = f(&buffer[0..self.header.usable_page_size()])?;
117 |         let ptr = Arc::new(parsed);
118 | 
119 |         write_pages.insert(n, ptr.clone().into());
120 | 
121 |         Ok(ptr)
122 |     }
123 | 
124 |     fn load_raw(&self, n: usize) -> anyhow::Result<Vec<u8>> {
125 |         let offset = n.saturating_sub(1) * self.header.page_size as usize;
126 | 
127 |         let mut input_guard = self
128 |             .input
129 |             .lock()
130 |             .map_err(|_| anyhow!("poisoned pager mutex"))?;
131 | 
132 |         input_guard
133 |             .seek(SeekFrom::Start(offset as u64))
134 |             .context("seek to page start")?;
135 | 
136 |         let mut buffer = vec![0; self.header.page_size as usize];
137 |         input_guard.read_exact(&mut buffer).context("read page")?;
138 | 
139 |         Ok(buffer)
140 |     }
141 | }
142 | 
143 | impl Clone for Pager {
144 |     fn clone(&self) -> Self {
145 |         Self {
146 |             input: self.input.clone(),
147 |             pages: self.pages.clone(),
148 |             header: self.header,
149 |         }
150 |     }
151 | }
152 | 
153 | fn parse_overflow_page(buffer: &[u8]) -> page::OverflowPage {
154 |     let next = read_be_double_at(buffer, 0);
155 |     page::OverflowPage {
156 |         payload: buffer[4..].to_vec(),
157 |         next: if next != 0 { Some(next as usize) } else { None },
158 |     }
159 | }
160 | 
161 | pub fn parse_header(buffer: &[u8]) -> anyhow::Result<page::DbHeader> {
162 |     if !buffer.starts_with(HEADER_PREFIX) {
163 |         let prefix = String::from_utf8_lossy(&buffer[..HEADER_PREFIX.len()]);
164 |         anyhow::bail!("invalid header prefix: {prefix}");
165 |     }
166 | 
167 |     let page_size_raw = read_be_word_at(buffer, HEADER_PAGE_SIZE_OFFSET);
168 |     let page_size = match page_size_raw {
169 |         1 => PAGE_MAX_SIZE,
170 |         n if n.is_power_of_two() => n as u32,
171 |         _ => anyhow::bail!("page size is not a power of 2: {}", page_size_raw),
172 |     };
173 | 
174 |     let page_reserved_size = buffer[HEADER_PAGE_RESERVED_SIZE_OFFSET];
175 | 
176 |     Ok(page::DbHeader {
177 |         page_size,
178 |         page_reserved_size,
179 |     })
180 | }
181 | 
182 | fn parse_page(db_header: &DbHeader, buffer: &[u8], page_num: usize) -> anyhow::Result<page::Page> {
183 |     let ptr_offset = if page_num == 1 { HEADER_SIZE as u16 } else { 0 };
184 |     let content_buffer = &buffer[ptr_offset as usize..];
185 |     let header = parse_page_header(content_buffer)?;
186 |     let cell_pointers = parse_cell_pointers(
187 |         &content_buffer[header.byte_size()..],
188 |         header.cell_count as usize,
189 |         ptr_offset,
190 |     );
191 | 
192 |     let cells_parsing_fn = match header.page_type {
193 |         page::PageType::TableLeaf => parse_table_leaf_cell,
194 |         page::PageType::TableInterior => parse_table_interior_cell,
195 |     };
196 | 
197 |     let cells = parse_cells(
198 |         db_header,
199 |         &header,
200 |         content_buffer,
201 |         &cell_pointers,
202 |         cells_parsing_fn,
203 |     )?;
204 | 
205 |     Ok(page::Page { header, cells })
206 | }
207 | 
208 | fn parse_cells(
209 |     db_header: &DbHeader,
210 |     header: &PageHeader,
211 |     buffer: &[u8],
212 |     cell_pointers: &[u16],
213 |     parse_fn: impl Fn(&DbHeader, &PageHeader, &[u8]) -> anyhow::Result<page::Cell>,
214 | ) -> anyhow::Result<Vec<page::Cell>> {
215 |     cell_pointers
216 |         .iter()
217 |         .map(|&ptr| parse_fn(db_header, header, &buffer[ptr as usize..]))
218 |         .collect()
219 | }
220 | 
221 | fn parse_table_leaf_cell(
222 |     db_header: &DbHeader,
223 |     header: &PageHeader,
224 |     mut buffer: &[u8],
225 | ) -> anyhow::Result<page::Cell> {
226 |     let (n, size) = read_varint_at(buffer, 0);
227 |     buffer = &buffer[n as usize..];
228 | 
229 |     let (n, _) = read_varint_at(buffer, 0);
230 |     buffer = &buffer[n as usize..];
231 | 
232 |     let (local_size, overflow_size) = header.local_and_overflow_size(db_header, size as usize)?;
233 |     let first_overflow = overflow_size.map(|_| read_be_double_at(buffer, local_size) as usize);
234 | 
235 |     let payload = buffer[..local_size].to_vec();
236 | 
237 |     Ok(page::TableLeafCell {
238 |         payload,
239 |         first_overflow,
240 |     }
241 |     .into())
242 | }
243 | 
244 | fn parse_table_interior_cell(
245 |     _: &DbHeader,
246 |     _: &PageHeader,
247 |     buffer: &[u8],
248 | ) -> anyhow::Result<page::Cell> {
249 |     Ok(page::TableInteriorCell {
250 |         left_child_page: read_be_double_at(buffer, 0),
251 |     }
252 |     .into())
253 | }
254 | 
255 | fn parse_page_header(buffer: &[u8]) -> anyhow::Result<page::PageHeader> {
256 |     let (page_type, rightmost_ptr) = match buffer[0] {
257 |         PAGE_LEAF_TABLE_ID => (page::PageType::TableLeaf, false),
258 |         PAGE_INTERIOR_TABLE_ID => (page::PageType::TableInterior, true),
259 |         _ => anyhow::bail!("unknown page type: {}", buffer[0]),
260 |     };
261 | 
262 |     let cell_count = read_be_word_at(buffer, PAGE_CELL_COUNT_OFFSET);
263 | 
264 |     let rightmost_pointer = if rightmost_ptr {
265 |         Some(read_be_double_at(buffer, PAGE_RIGHTMOST_POINTER_OFFSET))
266 |     } else {
267 |         None
268 |     };
269 | 
270 |     Ok(page::PageHeader {
271 |         page_type,
272 |         cell_count,
273 |         rightmost_pointer,
274 |     })
275 | }
276 | 
277 | fn parse_cell_pointers(buffer: &[u8], n: usize, ptr_offset: u16) -> Vec<u16> {
278 |     let mut pointers = Vec::with_capacity(n);
279 |     for i in 0..n {
280 |         pointers.push(read_be_word_at(buffer, 2 * i) - ptr_offset);
281 |     }
282 |     pointers
283 | }
284 | 
285 | pub fn read_varint_at(buffer: &[u8], mut offset: usize) -> (u8, i64) {
286 |     let mut size = 0;
287 |     let mut result = 0;
288 | 
289 |     while size < 9 {
290 |         let current_byte = buffer[offset] as i64;
291 |         if size == 8 {
292 |             result = (result << 8) | current_byte;
293 |         } else {
294 |             result = (result << 7) | (current_byte & 0b0111_1111);
295 |         }
296 | 
297 |         offset += 1;
298 |         size += 1;
299 | 
300 |         if current_byte & 0b1000_0000 == 0 {
301 |             break;
302 |         }
303 |     }
304 | 
305 |     (size, result)
306 | }
307 | 
308 | pub fn read_be_double_at(input: &[u8], offset: usize) -> u32 {
309 |     u32::from_be_bytes(input[offset..offset + 4].try_into().unwrap())
310 | }
311 | 
312 | fn read_be_word_at(input: &[u8], offset: usize) -> u16 {
313 |     u16::from_be_bytes(input[offset..offset + 2].try_into().unwrap())
314 | }
315 | 
316 | #[cfg(test)]
317 | mod test {
318 |     use super::*;
319 | 
320 |     #[test]
321 |     fn short_varint() {
322 |         let buffer = [0b0000_0001];
323 |         assert_eq!(read_varint_at(&buffer, 0), (1, 1));
324 |     }
325 | 
326 |     #[test]
327 |     fn middle_varint() {
328 |         let buffer = [0b1000_0001, 0b0111_1111];
329 |         assert_eq!(read_varint_at(&buffer, 0), (2, 255));
330 |     }
331 | 
332 |     #[test]
333 |     fn long_varint() {
334 |         let buffer = [
335 |             0b1000_0000,
336 |             0b1111_1111,
337 |             0b1000_0000,
338 |             0b1000_0000,
339 |             0b1000_0000,
340 |             0b1000_0000,
341 |             0b1000_0000,
342 |             0b1000_0000,
343 |             0b0110_1101,
344 |         ];
345 |         assert_eq!(
346 |             read_varint_at(&buffer, 0),
347 |             (
348 |                 9,
349 |                 0b00000001_11111100_00000000_00000000_00000000_00000000_00000000_01101101,
350 |             )
351 |         );
352 |     }
353 | 
354 |     #[test]
355 |     fn minus_one() {
356 |         let buffer = [
357 |             0b1111_1111,
358 |             0b1111_1111,
359 |             0b1111_1111,
360 |             0b1111_1111,
361 |             0b1111_1111,
362 |             0b1111_1111,
363 |             0b1111_1111,
364 |             0b1111_1111,
365 |             0b1111_1111,
366 |         ];
367 |         assert_eq!(read_varint_at(&buffer, 0), (9, -1));
368 |     }
369 | }
370 | 


--------------------------------------------------------------------------------
/src/cursor.rs:
--------------------------------------------------------------------------------
  1 | use std::{borrow::Cow, sync::Arc};
  2 | 
  3 | use anyhow::Context;
  4 | 
  5 | use crate::{
  6 |     page::{Cell, Page, PageType},
  7 |     pager::Pager,
  8 |     value::{OwnedValue, Value},
  9 | };
 10 | 
 11 | #[derive(Debug, Copy, Clone)]
 12 | pub enum RecordFieldType {
 13 |     Null,
 14 |     I8,
 15 |     I16,
 16 |     I24,
 17 |     I32,
 18 |     I48,
 19 |     I64,
 20 |     Float,
 21 |     Zero,
 22 |     One,
 23 |     String(usize),
 24 |     Blob(usize),
 25 | }
 26 | 
 27 | #[derive(Debug, Clone)]
 28 | pub struct RecordField {
 29 |     pub offset: usize,
 30 |     pub field_type: RecordFieldType,
 31 | }
 32 | 
 33 | impl RecordField {
 34 |     pub fn end_offset(&self) -> usize {
 35 |         let size = match self.field_type {
 36 |             RecordFieldType::Null => 0,
 37 |             RecordFieldType::I8 => 1,
 38 |             RecordFieldType::I16 => 2,
 39 |             RecordFieldType::I24 => 3,
 40 |             RecordFieldType::I32 => 4,
 41 |             RecordFieldType::I48 => 5,
 42 |             RecordFieldType::I64 => 8,
 43 |             RecordFieldType::Float => 8,
 44 |             RecordFieldType::Zero => 0,
 45 |             RecordFieldType::One => 0,
 46 |             RecordFieldType::String(size) | RecordFieldType::Blob(size) => size,
 47 |         };
 48 | 
 49 |         self.offset + size
 50 |     }
 51 | }
 52 | 
 53 | #[derive(Debug, Clone)]
 54 | pub struct RecordHeader {
 55 |     pub fields: Vec<RecordField>,
 56 | }
 57 | 
 58 | fn parse_record_header(mut buffer: &[u8]) -> anyhow::Result<RecordHeader> {
 59 |     let (varint_size, header_length) = crate::pager::read_varint_at(buffer, 0);
 60 |     buffer = &buffer[varint_size as usize..header_length as usize];
 61 | 
 62 |     let mut fields = Vec::new();
 63 |     let mut current_offset = header_length as usize;
 64 | 
 65 |     while !buffer.is_empty() {
 66 |         let (discriminant_size, discriminant) = crate::pager::read_varint_at(buffer, 0);
 67 |         buffer = &buffer[discriminant_size as usize..];
 68 | 
 69 |         let (field_type, field_size) = match discriminant {
 70 |             0 => (RecordFieldType::Null, 0),
 71 |             1 => (RecordFieldType::I8, 1),
 72 |             2 => (RecordFieldType::I16, 2),
 73 |             3 => (RecordFieldType::I24, 3),
 74 |             4 => (RecordFieldType::I32, 4),
 75 |             5 => (RecordFieldType::I48, 6),
 76 |             6 => (RecordFieldType::I64, 8),
 77 |             7 => (RecordFieldType::Float, 8),
 78 |             8 => (RecordFieldType::Zero, 0),
 79 |             9 => (RecordFieldType::One, 0),
 80 |             n if n >= 12 && n % 2 == 0 => {
 81 |                 let size = ((n - 12) / 2) as usize;
 82 |                 (RecordFieldType::Blob(size), size)
 83 |             }
 84 |             n if n >= 13 && n % 2 == 1 => {
 85 |                 let size = ((n - 13) / 2) as usize;
 86 |                 (RecordFieldType::String(size), size)
 87 |             }
 88 |             n => anyhow::bail!("unsupported field type: {}", n),
 89 |         };
 90 | 
 91 |         fields.push(RecordField {
 92 |             offset: current_offset,
 93 |             field_type,
 94 |         });
 95 | 
 96 |         current_offset += field_size;
 97 |     }
 98 | 
 99 |     Ok(RecordHeader { fields })
100 | }
101 | 
102 | #[derive(Debug)]
103 | pub struct Cursor {
104 |     header: RecordHeader,
105 |     payload: Vec<u8>,
106 |     pager: Pager,
107 |     next_overflow_page: Option<usize>,
108 | }
109 | 
110 | impl Cursor {
111 |     pub fn owned_field(&mut self, n: usize) -> anyhow::Result<Option<OwnedValue>> {
112 |         Ok(self.field(n)?.map(Into::into))
113 |     }
114 | 
115 |     pub fn field(&mut self, n: usize) -> anyhow::Result<Option<Value>> {
116 |         let Some(record_field) = self.header.fields.get(n) else {
117 |             return Ok(None);
118 |         };
119 | 
120 |         let end_offset = record_field.end_offset();
121 | 
122 |         if end_offset > (self.payload.len() - 1)
123 |             && let Some(overflow_page) = self.next_overflow_page
124 |         {
125 |             let overflow_size = end_offset.saturating_sub(self.payload.len());
126 |             let (next_overflow, overflow_data) = OverflowScanner::new(self.pager.clone())
127 |                 .read(overflow_page, overflow_size)
128 |                 .context("read overflow page")?;
129 |             self.next_overflow_page = next_overflow;
130 |             self.payload.extend_from_slice(&overflow_data);
131 |         }
132 | 
133 |         let value = match record_field.field_type {
134 |             RecordFieldType::Null => Some(Value::Null),
135 |             RecordFieldType::I8 => Some(Value::Int(read_i8_at(&self.payload, record_field.offset))),
136 |             RecordFieldType::I16 => {
137 |                 Some(Value::Int(read_i16_at(&self.payload, record_field.offset)))
138 |             }
139 |             RecordFieldType::I24 => {
140 |                 Some(Value::Int(read_i24_at(&self.payload, record_field.offset)))
141 |             }
142 |             RecordFieldType::I32 => {
143 |                 Some(Value::Int(read_i32_at(&self.payload, record_field.offset)))
144 |             }
145 |             RecordFieldType::I48 => {
146 |                 Some(Value::Int(read_i48_at(&self.payload, record_field.offset)))
147 |             }
148 |             RecordFieldType::I64 => {
149 |                 Some(Value::Int(read_i64_at(&self.payload, record_field.offset)))
150 |             }
151 |             RecordFieldType::Float => Some(Value::Float(read_f64_at(
152 |                 &self.payload,
153 |                 record_field.offset,
154 |             ))),
155 |             RecordFieldType::String(length) => {
156 |                 let value = std::str::from_utf8(
157 |                     &self.payload[record_field.offset..record_field.offset + length],
158 |                 )
159 |                 .expect("invalid utf8");
160 |                 Some(Value::String(Cow::Borrowed(value)))
161 |             }
162 |             RecordFieldType::Blob(length) => {
163 |                 let value = &self.payload[record_field.offset..record_field.offset + length];
164 |                 Some(Value::Blob(Cow::Borrowed(value)))
165 |             }
166 |             RecordFieldType::One => Some(Value::Int(1)),
167 |             RecordFieldType::Zero => Some(Value::Int(0)),
168 |         };
169 | 
170 |         Ok(value)
171 |     }
172 | }
173 | 
174 | fn read_i8_at(input: &[u8], offset: usize) -> i64 {
175 |     input[offset] as i64
176 | }
177 | 
178 | fn read_i16_at(input: &[u8], offset: usize) -> i64 {
179 |     i16::from_be_bytes(input[offset..offset + 2].try_into().unwrap()) as i64
180 | }
181 | 
182 | fn read_i24_at(input: &[u8], offset: usize) -> i64 {
183 |     (i32::from_be_bytes(input[offset..offset + 3].try_into().unwrap()) & 0x00FFFFFF) as i64
184 | }
185 | 
186 | fn read_i32_at(input: &[u8], offset: usize) -> i64 {
187 |     i32::from_be_bytes(input[offset..offset + 4].try_into().unwrap()) as i64
188 | }
189 | 
190 | fn read_i48_at(input: &[u8], offset: usize) -> i64 {
191 |     i64::from_be_bytes(input[offset..offset + 6].try_into().unwrap()) & 0x0000FFFFFFFFFFFF
192 | }
193 | 
194 | fn read_i64_at(input: &[u8], offset: usize) -> i64 {
195 |     i64::from_be_bytes(input[offset..offset + 8].try_into().unwrap())
196 | }
197 | 
198 | fn read_f64_at(input: &[u8], offset: usize) -> f64 {
199 |     f64::from_be_bytes(input[offset..offset + 8].try_into().unwrap())
200 | }
201 | 
202 | #[derive(Debug)]
203 | pub struct PositionedPage {
204 |     pub page: Arc<Page>,
205 |     pub cell: usize,
206 | }
207 | 
208 | impl PositionedPage {
209 |     pub fn next_cell(&mut self) -> Option<&Cell> {
210 |         let cell = self.page.get(self.cell);
211 |         self.cell += 1;
212 |         cell
213 |     }
214 | 
215 |     pub fn next_page(&mut self) -> Option<u32> {
216 |         if self.page.header.page_type == PageType::TableInterior
217 |             && self.cell == self.page.cells.len()
218 |         {
219 |             self.cell += 1;
220 |             self.page.header.rightmost_pointer
221 |         } else {
222 |             None
223 |         }
224 |     }
225 | }
226 | 
227 | #[derive(Debug)]
228 | pub struct Scanner {
229 |     initial_page: usize,
230 |     page_stack: Vec<PositionedPage>,
231 |     pager: Pager,
232 | }
233 | 
234 | impl Scanner {
235 |     pub fn new(page: usize, pager: Pager) -> Scanner {
236 |         Scanner {
237 |             initial_page: page,
238 |             page_stack: Vec::new(),
239 |             pager,
240 |         }
241 |     }
242 | 
243 |     pub fn next_record(&mut self) -> anyhow::Result<Option<Cursor>> {
244 |         loop {
245 |             match self.next_elem() {
246 |                 Ok(Some(ScannerElem::Cursor(cursor))) => return Ok(Some(cursor)),
247 |                 Ok(Some(ScannerElem::Page(page_num))) => {
248 |                     let new_page = self.pager.read_page(page_num as usize)?.clone();
249 |                     self.page_stack.push(PositionedPage {
250 |                         page: new_page,
251 |                         cell: 0,
252 |                     });
253 |                 }
254 |                 Ok(None) if self.page_stack.len() > 1 => {
255 |                     self.page_stack.pop();
256 |                 }
257 |                 Ok(None) => return Ok(None),
258 |                 Err(e) => return Err(e),
259 |             }
260 |         }
261 |     }
262 | 
263 |     fn next_elem(&mut self) -> anyhow::Result<Option<ScannerElem>> {
264 |         let pager = self.pager.clone();
265 | 
266 |         let Some(page) = self.current_page()? else {
267 |             return Ok(None);
268 |         };
269 | 
270 |         if let Some(page) = page.next_page() {
271 |             return Ok(Some(ScannerElem::Page(page)));
272 |         }
273 | 
274 |         let Some(cell) = page.next_cell() else {
275 |             return Ok(None);
276 |         };
277 | 
278 |         match cell {
279 |             Cell::TableLeaf(cell) => {
280 |                 let header = parse_record_header(&cell.payload)?;
281 |                 Ok(Some(ScannerElem::Cursor(Cursor {
282 |                     header,
283 |                     payload: cell.payload.clone(),
284 |                     pager,
285 |                     next_overflow_page: cell.first_overflow,
286 |                 })))
287 |             }
288 |             Cell::TableInterior(cell) => Ok(Some(ScannerElem::Page(cell.left_child_page))),
289 |         }
290 |     }
291 | 
292 |     fn current_page(&mut self) -> anyhow::Result<Option<&mut PositionedPage>> {
293 |         if self.page_stack.is_empty() {
294 |             let page = match self.pager.read_page(self.initial_page) {
295 |                 Ok(page) => page.clone(),
296 |                 Err(e) => return Err(e),
297 |             };
298 | 
299 |             self.page_stack.push(PositionedPage { page, cell: 0 });
300 |         }
301 | 
302 |         Ok(self.page_stack.last_mut())
303 |     }
304 | }
305 | 
306 | #[derive(Debug)]
307 | enum ScannerElem {
308 |     Page(u32),
309 |     Cursor(Cursor),
310 | }
311 | 
312 | #[derive(Debug)]
313 | struct OverflowScanner {
314 |     pager: Pager,
315 | }
316 | 
317 | impl OverflowScanner {
318 |     pub fn new(pager: Pager) -> Self {
319 |         Self { pager }
320 |     }
321 | 
322 |     pub fn read(&self, first_page: usize, size: usize) -> anyhow::Result<(Option<usize>, Vec<u8>)> {
323 |         let mut next_page = Some(first_page);
324 |         let mut buffer = Vec::with_capacity(size);
325 | 
326 |         while buffer.len() < size
327 |             && let Some(next) = next_page
328 |         {
329 |             let overflow = self.pager.read_overflow(next)?;
330 |             next_page = overflow.next;
331 |             buffer.extend_from_slice(&overflow.payload);
332 |         }
333 | 
334 |         Ok((next_page, buffer))
335 |     }
336 | }
337 | 


--------------------------------------------------------------------------------
/blog/part4.md:
--------------------------------------------------------------------------------
  1 | ### Build your own SQLite, Part 4: reading tables metadata
  2 | 
  3 | As we saw in the [opening post](/build-your-own-sqlite-part-1-listing-tables),
  4 | SQLite stores metadata about tables in a special "schema table" starting on page 1.
  5 | We've been reading records from this table to list the tables in the current database,
  6 | but before we can start evaluating SQL queries against user-defined tables, we need to
  7 | extract more information from the schema table.
  8 | 
  9 | For each table, we need to know:
 10 | 
 11 | * the table name
 12 | * the root page
 13 | * the name and type of each column
 14 | 
 15 | The first two are very easy to extract, as they are directly stored in fields 1 and 3
 16 | of the schema table's records. But column names and types will be a bit trickier, as they are
 17 | not neatly separated into record fields, but are stored in a single field in the
 18 | form of a `CREATE TABLE` statement that we'll need to parse.
 19 | 
 20 | The complete source code is available
 21 | on [GitHub](https://github.com/geoffreycopin/rqlite/tree/4e098ca03b814448eb1a2650d64cda12227e9300).
 22 | 
 23 | ## Parsing `CREATE TABLE` statements
 24 | 
 25 | The first step in extending our SQL parser to support `CREATE TABLE` statements it to
 26 | add the necessary token types to the tokenizer. We'll support `CREATE TABLE` statements
 27 | of the following form:
 28 | 
 29 | ```sql
 30 | CREATE TABLE table_name
 31 | (
 32 |     column1_name column1_type,
 33 |     column2_name column2_type, .
 34 |     .
 35 |     .
 36 | )
 37 | ```
 38 | 
 39 | The following tokens are new and need to be added to the `Token` enum: `CREATE`, `TABLE`, `(`, `)`.
 40 | 
 41 | ```diff
 42 | // sql/tokenizer.rs
 43 | 
 44 | #[derive(Debug, Eq, PartialEq)]
 45 | pub enum Token {
 46 | +   Create,
 47 | +   Table,
 48 |     Select,
 49 |     As,
 50 |     From,
 51 | +   LPar,
 52 | +   RPar,
 53 |     Star,
 54 |     Comma,
 55 |     SemiColon,
 56 |     Identifier(String),
 57 | }
 58 | 
 59 | //[...]
 60 | 
 61 | pub fn tokenize(input: &str) -> anyhow::Result<Vec<Token>> {
 62 |     let mut tokens = Vec::new();
 63 |     let mut chars = input.chars().peekable();
 64 | 
 65 |     while let Some(c) = chars.next() {
 66 |         match c {
 67 | +           '(' => tokens.push(Token::LPar),
 68 | +           ')' => tokens.push(Token::RPar),
 69 |             '*' => tokens.push(Token::Star),
 70 |             ',' => tokens.push(Token::Comma),
 71 |             ';' => tokens.push(Token::SemiColon),
 72 |             c if c.is_whitespace() => continue,
 73 |             c if c.is_alphabetic() => {
 74 |                 let mut ident = c.to_string().to_lowercase();
 75 |                 while let Some(cc) = chars.next_if(|&cc| cc.is_alphanumeric() || cc == '_') {
 76 |                     ident.extend(cc.to_lowercase());
 77 |                 }
 78 | 
 79 |                 match ident.as_str() {
 80 | +                   "create" => tokens.push(Token::Create),
 81 | +                   "table" => tokens.push(Token::Table),
 82 |                     "select" => tokens.push(Token::Select),
 83 |                     "as" => tokens.push(Token::As),
 84 |                     "from" => tokens.push(Token::From),
 85 |                     _ => tokens.push(Token::Identifier(ident)),
 86 |                 }
 87 |             }
 88 |             _ => bail!("unexpected character: {}", c),
 89 |         }
 90 |     }
 91 | 
 92 |     Ok(tokens)
 93 | }
 94 | ```
 95 | 
 96 | Next, we need to extend our AST to represent the new statement type.
 97 | Our representation will be based on the [SQLite documentation](https://www.sqlite.org/lang_createtable.html).
 98 | 
 99 | ```diff
100 | // sql/ast.rs
101 | 
102 | //[...]
103 | 
104 | #[derive(Debug, Clone, Eq, PartialEq)]
105 | pub enum Statement {
106 |     Select(SelectStatement),
107 | +   CreateTable(CreateTableStatement),
108 | }
109 | +
110 | +#[derive(Debug, Clone, Eq, PartialEq)]
111 | +pub struct CreateTableStatement {
112 | +    pub name: String,
113 | +    pub columns: Vec<ColumnDef>,
114 | +}
115 | +
116 | +#[derive(Debug, Clone, Eq, PartialEq)]
117 | +pub struct ColumnDef {
118 | +    pub name: String,
119 | +    pub col_type: Type,
120 | +}
121 | +
122 | +#[derive(Debug, Clone, Eq, PartialEq)]
123 | +pub enum Type {
124 | +    Integer,
125 | +    Real,
126 | +    Text,
127 | +    Blob,
128 | +}
129 | 
130 | //[...]
131 | ```
132 | 
133 | Parsing types is straightforward: we can simply match the incoming identifier
134 | token with a predefined set of types. For now, we'll restrict ourselves to
135 | `INTEGER`, `REAL`, `TEXT`, `STRING`, and `BLOB`.
136 | Once our `parse_type` method is implemented, constructing `ColumnDef` nodes
137 | is trivial.
138 | 
139 | ```rust
140 | // sql/parser.rs
141 | 
142 | //[...]
143 | impl ParserState {
144 |     // [...]
145 |     fn parse_column_def(&mut self) -> anyhow::Result<ColumnDef> {
146 |         Ok(ColumnDef {
147 |             name: self.expect_identifier()?.to_string(),
148 |             col_type: self.parse_type()?,
149 |         })
150 |     }
151 | 
152 |     fn parse_type(&mut self) -> anyhow::Result<Type> {
153 |         let type_name = self.expect_identifier()?;
154 |         let t = match type_name.to_lowercase().as_str() {
155 |             "integer" => Type::Integer,
156 |             "real" => Type::Real,
157 |             "blob" => Type::Blob,
158 |             "text" | "string" => Type::Text,
159 |             _ => bail!("unsupported type: {type_name}"),
160 |         };
161 |         Ok(t)
162 |     }
163 |     // [...]
164 | }
165 | 
166 | //[...]
167 | ```
168 | 
169 | In our implementation if the `parse_create_table` method, we'll parse column definitions
170 | using the same pattern as in the `parse_result_colums` method:
171 | 
172 | ```rust
173 | // sql/parser.rs
174 | 
175 | //[...]
176 | impl ParserState {
177 |     // [...]
178 |     fn parse_create_table(&mut self) -> anyhow::Result<CreateTableStatement> {
179 |         self.expect_eq(Token::Create)?;
180 |         self.expect_eq(Token::Table)?;
181 |         let name = self.expect_identifier()?.to_string();
182 |         self.expect_eq(Token::LPar)?;
183 |         let mut columns = vec![self.parse_column_def()?];
184 |         while self.next_token_is(Token::Comma) {
185 |             self.advance();
186 |             columns.push(self.parse_column_def()?);
187 |         }
188 |         self.expect_eq(Token::RPar)?;
189 |         Ok(CreateTableStatement { name, columns })
190 |     }
191 |     // [...]
192 | }
193 | //[...]
194 | ```
195 | 
196 | Finally, we need to update the `parse_statement` method to handle the new statement type.
197 | We'll also update the `parse_statement` utility function to make the semicolon terminator
198 | optional, as the `CREATE TABLE` statements stored in the schema table lack a trailing semicolon.
199 | 
200 | ```diff
201 | // sql/parser.rs
202 | 
203 | //[...]
204 | 
205 | impl ParserState {
206 |     // [...]
207 |         
208 |     fn parse_statement(&mut self) -> anyhow::Result<Statement> {
209 | -       Ok(ast::Statement::Select(self.parse_select()?))
210 | +       match self.peak_next_token().context("unexpected end of input")? {
211 | +           Token::Select => self.parse_select().map(Statement::Select),
212 | +           Token::Create => self.parse_create_table().map(Statement::CreateTable),
213 | +           token => bail!("unexpected token: {token:?}"),
214 | +       }
215 |     }    
216 |         
217 |     // [...]
218 | }
219 | 
220 | // [...]
221 | 
222 | -pub fn parse_statement(input: &str) -> anyhow::Result<Statement> {
223 | +pub fn parse_statement(input: &str, trailing_semicolon: bool) -> anyhow::Result<Statement> {
224 |     let tokens = tokenizer::tokenize(input)?;
225 |     let mut state = ParserState::new(tokens);
226 |     let statement = state.parse_statement()?;
227 | +   if trailing_semicolon {
228 |         state.expect_eq(Token::SemiColon)?;
229 | +   }
230 |     Ok(statement)
231 | }
232 | 
233 | +pub fn parse_create_statement(
234 | +    input: &str,
235 | +) -> anyhow::Result<CreateTableStatement> {
236 | +    match parse_statement(input, false)? {
237 | +        Statement::CreateTable(c) => Ok(c),
238 | +        Statement::Select(_) => bail!("expected a create statement"),
239 | +    }
240 | +}
241 | ```
242 | 
243 | ## Reading metadata
244 | 
245 | Now that we have the necessary building blocks to read table metadata,
246 | we can extend our `Database` struct to store this information.
247 | The `TableMetadata::from_cursor` method builds a `TableMetadata` struct
248 | from a `Cursor` object, which represents a record in the schema table.
249 | The create statement and first page are extracted from fields 4 and 3, respectively.
250 | 
251 | As records from the schema table contain informations about other kinds
252 | of objects, such as triggers, we check the `type` field at index 0 to ensure
253 | we're dealing with a table.
254 | 
255 | Finally, in `Db::collect_metadata`, we iterate over all the records in the schema table,
256 | collecting table metadata for each table record we encounter.
257 | 
258 | ```diff
259 | // db.rs
260 | 
261 | +#[derive(Debug, Clone)]
262 | +pub struct TableMetadata {
263 | +    pub name: String,
264 | +    pub columns: Vec<ast::ColumnDef>,
265 | +    pub first_page: usize,
266 | +}
267 | 
268 | +impl TableMetadata {
269 | +   fn from_cursor(cursor: Cursor) -> anyhow::Result<Option<Self>> {
270 | +       let type_value = cursor
271 | +           .field(0)
272 | +           .context("missing type field")
273 | +           .context("invalid type field")?;
274 | 
275 | +       if type_value.as_str() != Some("table") {
276 | +           return Ok(None);
277 | +       }
278 | 
279 | +       let create_stmt = cursor
280 | +           .field(4)
281 | +           .context("missing create statement")
282 | +           .context("invalid create statement")?
283 | +           .as_str()
284 | +           .context("table create statement should be a string")?
285 | +           .to_owned();
286 | 
287 | +       let create = sql::parse_create_statement(&create_stmt)?;
288 | 
289 | +       let first_page = cursor
290 | +           .field(3)
291 | +           .context("missing table first page")?
292 | +           .as_int()
293 | +           .context("table first page should be an integer")? as usize;
294 | 
295 | +       Ok(Some(TableMetadata {
296 | +           name: create.name,
297 | +           columns: create.columns,
298 | +           first_page,
299 | +       }))
300 | +    }
301 | +}
302 | 
303 | pub struct Db {
304 |     pub header: DbHeader,
305 | +   pub tables_metadata: Vec<TableMetadata>,
306 |     pager: Pager,
307 | }
308 | 
309 | impl Db {
310 |     pub fn from_file(filename: impl AsRef<Path>) -> anyhow::Result<Db> {
311 |         let mut file = std::fs::File::open(filename.as_ref()).context("open db file")?;
312 | 
313 |         let mut header_buffer = [0; pager::HEADER_SIZE];
314 |         file.read_exact(&mut header_buffer)
315 |             .context("read db header")?;
316 | 
317 |         let header = pager::parse_header(&header_buffer).context("parse db header")?;
318 | 
319 | +       let tables_metadata = Self::collect_tables_metadata(&mut Pager::new(
320 | +           file.try_clone()?,
321 | +           header.page_size as usize,
322 | +       ))?;
323 | 
324 |         let pager = Pager::new(file, header.page_size as usize);
325 | 
326 |         Ok(Db {
327 |             header,
328 |             pager,
329 | +           tables_metadata,
330 |         })
331 |     }
332 |     
333 | +   fn collect_tables_metadata(pager: &mut Pager) -> anyhow::Result<Vec<TableMetadata>> {
334 | +       let mut metadata = Vec::new();
335 | +       let mut scanner = Scanner::new(pager, 1);
336 | 
337 | +       while let Some(record) = scanner.next_record()? {
338 | +           if let Some(m) = TableMetadata::from_cursor(record)? {
339 | +               metadata.push(m);
340 | +           }
341 | +       }
342 | 
343 | +       Ok(metadata)
344 | +   }
345 | 
346 |     // [...]
347 | }
348 | ```
349 | 
350 | Our initial implementation of the `.table` command can be updated to use the new metadata:
351 | 
352 | ```diff
353 | // main.rs
354 | 
355 | fn display_tables(db: &mut db::Db) -> anyhow::Result<()> {
356 | -   let mut scanner = db.scanner(1);
357 | -
358 | -   while let Some(mut record) = scanner.next_record()? {
359 | -       let type_value = record
360 | -           .field(0)
361 | -           .context("missing type field")
362 | -           .context("invalid type field")?;
363 | 
364 | -       if type_value.as_str() == Some("table") {
365 | -           let name_value = record
366 | -               .field(1)
367 | -               .context("missing name field")
368 | -               .context("invalid name field")?;
369 | 
370 | -           print!("{} ", name_value.as_str().unwrap());
371 | -       }
372 | -   }
373 | +   for table in &db.tables_metadata {
374 | +       print!("{} ", &table.name)
375 | +   }
376 |     
377 |     Ok(())
378 | }
379 | ```
380 | 
381 | ## Conclusion
382 | 
383 | We've extended our SQL parser to support `CREATE TABLE` statements and used it to
384 | extract metadata from the schema table. By parsing the schema, we now have a
385 | way to understand the structure of tables in our database.
386 | 
387 | In the next post, we'll leverage this metadata to build a query evaluator
388 | that can execute simple `SELECT` queries against user-defined tables,
389 | bringing us one step closer to a fully functional database engine.
390 | 


--------------------------------------------------------------------------------
/blog/part5.md:
--------------------------------------------------------------------------------
  1 | ### Build your own SQLite, Part 5: Evaluating queries
  2 | 
  3 | In the previous posts, we've explored the
  4 | [SQLite file format](/build-your-own-sqlite-part-1-listing-tables) and built a
  5 | simple [SQL parser](/build-your-own-sqlite-part-3-sql-parsing-101). It's time
  6 | to put these pieces together and implement a query evaluator!
  7 | In this post, we'll lay the groundwork for evaluating SQL queries and build a
  8 | query evaluator that can handle basic SELECT statements. While our initial implementation
  9 | won't support filtering, sorting, grouping, or joins yet, it will give us the
 10 | foundation to add these features in future posts.
 11 | 
 12 | As usual, the complete source code for this post is available
 13 | on [GitHub](https://github.com/geoffreycopin/rqlite/commit/c7dfeeea6956e209ccbd50a727c2b9352c246082).
 14 | 
 15 | ## Setting up our test database
 16 | 
 17 | Before we can evaluate queries, we need a database to query. We'll start by
 18 | creating a simple database with a single table, `table1`, with two columns,
 19 | `id` and `value`:
 20 | 
 21 | ```bash
 22 | sqlite3 queries_test.db
 23 | sqlite> create table table1(id integer, value text);
 24 | sqlite> insert into table1(id, value) values
 25 |     ...> (1, '11'),
 26 |     ...> (2, '12'),
 27 |     ...> (3, '13');
 28 | sqlite> .exit
 29 | ```
 30 | 
 31 | ⚠️ You might be tempted to use an existing SQLite database to test your queries,
 32 | but keep in mind that our implementation does not support overflow pages yet,
 33 | so it might not be able to read the data from your database file.
 34 | 
 35 | ## Making the pager shareable
 36 | 
 37 | ---
 38 | This section is specific to the Rust implementation. If you're following along
 39 | with another language, you can safely skip it!
 40 | 
 41 | ---
 42 | 
 43 | Currently, our pager can only be used through an exclusive mutable reference.
 44 | This was fine for our initial use cases, but as we start building more complex
 45 | features, maintaining this restriction will constrain our design.
 46 | We'll make the pager shareable by wrapping its inner mutable fields in an
 47 | `Arc<Mutex<_>>` and `Arc<RwLock<_>>`. This will allow us to effectively clone the pager and
 48 | use it from multiple places without running into borrow checker issues.
 49 | At this stage of the project we could have chosen to use a simple `Rc<RefCell<_>>`,
 50 | but we'll eventually need to support concurrent access to the pager, so we'll
 51 | use thread-safe counterparts from the start.
 52 | 
 53 | ```diff
 54 | // src/pager.rs
 55 | 
 56 | - #[derive(Debug, Clone)]
 57 | + #[derive(Debug)]
 58 | pub struct Pager<I: Read + Seek = std::fs::File> {
 59 | -   input: I,
 60 | +   input: Arc<Mutex<I>>
 61 |     page_size: usize,
 62 | -   pages: HashMap<usize, page::Page>,
 63 | +   pages: Arc<RwLock<HashMap<usize, Arc<page::Page>>>>,
 64 | }
 65 | ```
 66 | 
 67 | The `read_page` and `load_page` methods need to be updated accordingly:
 68 | 
 69 | ```rust
 70 | impl<I: Read + Seek> Pager<I> {
 71 |     // [...] 
 72 |     pub fn read_page(&self, n: usize) -> anyhow::Result<Arc<page::Page>> {
 73 |         {
 74 |             let read_pages = self
 75 |                 .pages
 76 |                 .read()
 77 |                 .map_err(|_| anyhow!("failed to acquire pager read lock"))?;
 78 | 
 79 |             if let Some(page) = read_pages.get(&n) {
 80 |                 return Ok(page.clone());
 81 |             }
 82 |         }
 83 | 
 84 |         let mut write_pages = self
 85 |             .pages
 86 |             .write()
 87 |             .map_err(|_| anyhow!("failed to acquire pager write lock"))?;
 88 | 
 89 |         if let Some(page) = write_pages.get(&n) {
 90 |             return Ok(page.clone());
 91 |         }
 92 | 
 93 |         let page = self.load_page(n)?;
 94 |         write_pages.insert(n, page.clone());
 95 |         Ok(page)
 96 |     }
 97 | 
 98 |     fn load_page(&self, n: usize) -> anyhow::Result<Arc<page::Page>> {
 99 |         let offset = n.saturating_sub(1) * self.page_size;
100 | 
101 |         let mut input_guard = self
102 |             .input
103 |             .lock()
104 |             .map_err(|_| anyhow!("failed to lock pager mutex"))?;
105 | 
106 |         input_guard
107 |             .seek(SeekFrom::Start(offset as u64))
108 |             .context("seek to page start")?;
109 | 
110 |         let mut buffer = vec![0; self.page_size];
111 |         input_guard.read_exact(&mut buffer).context("read page")?;
112 | 
113 |         Ok(Arc::new(parse_page(&buffer, n)?))
114 |     }
115 | }
116 | ```
117 | 
118 | Two things to note regarding the `read_page` method:
119 | 
120 | - the initial attempt to read the page from the cache is nested in a block to
121 |   limit the scope of the read lock, ensuring that it is released before we try
122 |   to acquire the write lock
123 | - after acquiring the write lock, we check again if the page is already in the
124 |   cache, in case it was inserted in between the two lock acquisitions
125 | 
126 | Similarly, we'll define an owned version of our `Value` enum that we'll use
127 | in the query evaluator:
128 | 
129 | ```rust
130 | // src/value.rs
131 | 
132 | // [...]
133 | 
134 | #[derive(Debug, Clone)]
135 | pub enum OwnedValue {
136 |     Null,
137 |     String(Rc<String>),
138 |     Blob(Rc<Vec<u8>>),
139 |     Int(i64),
140 |     Float(f64),
141 | }
142 | 
143 | impl<'p> From<Value<'p>> for OwnedValue {
144 |     fn from(value: Value<'p>) -> Self {
145 |         match value {
146 |             Value::Null => Self::Null,
147 |             Value::Int(i) => Self::Int(i),
148 |             Value::Float(f) => Self::Float(f),
149 |             Value::Blob(b) => Self::Blob(Rc::new(b.into_owned())),
150 |             Value::String(s) => Self::String(Rc::new(s.into_owned())),
151 |         }
152 |     }
153 | }
154 | 
155 | impl std::fmt::Display for OwnedValue {
156 |     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
157 |         match self {
158 |             OwnedValue::Null => write!(f, "null"),
159 |             OwnedValue::String(s) => s.fmt(f),
160 |             OwnedValue::Blob(items) => {
161 |                 write!(
162 |                     f,
163 |                     "{}",
164 |                     items
165 |                         .iter()
166 |                         .filter_map(|&n| char::from_u32(n as u32).filter(char::is_ascii))
167 |                         .collect::<String>()
168 |                 )
169 |             }
170 |             OwnedValue::Int(i) => i.fmt(f),
171 |             OwnedValue::Float(x) => x.fmt(f),
172 |         }
173 |     }
174 | }
175 | ```
176 | 
177 | Finally, we'll enrich our `Cursor` struct with a method that returns the value
178 | of a field as an `OwnedValue`:
179 | 
180 | ```rust
181 | // src/cursor.rs
182 | 
183 | impl Cursor {
184 |     // [...] 
185 |     pub fn owned_field(&self, n: usize) -> Option<OwnedValue> {
186 |         self.field(n).map(Into::into)
187 |     }
188 |     // [...]
189 | }
190 | ```
191 | 
192 | ## Evaluating `SELECT` statements
193 | 
194 | Our query engine will be composed of two main components:
195 | 
196 | - an iterator-like `Operator` enum that represents nestable operations on the
197 |   database, such as scanning a table or filtering rows. Our initial implementation
198 |   will only contain a `SeqScan` operator that yields all rows from a table.
199 | - a `Planner` struct that takes a parsed SQL query and produces an `Operator` that
200 |   can be evaluated to produce the query result.
201 | 
202 | Let's start by defining the `Operator` enum:
203 | 
204 | ```rust
205 | // src/engine/operator.rs
206 | use anyhow::Context;
207 | 
208 | use crate::{cursor::Scanner, value::OwnedValue};
209 | 
210 | #[derive(Debug)]
211 | pub enum Operator {
212 |     SeqScan(SeqScan),
213 | }
214 | 
215 | impl Operator {
216 |     pub fn next_row(&mut self) -> anyhow::Result<Option<&[OwnedValue]>> {
217 |         match self {
218 |             Operator::SeqScan(s) => s.next_row(),
219 |         }
220 |     }
221 | }
222 | ```
223 | 
224 | The result of evaluating a query will be obtained by repeatedly calling the
225 | `next_row` method on the `Operator` until it returns `None`. Each value
226 | in the returned slice corresponds to a column in the query result.
227 | 
228 | The `SeqScan` struct will be responsible for scanning a table and yielding
229 | its rows:
230 | 
231 | ```rust
232 | // src/engine/operator.rs
233 | 
234 | // [...]
235 | 
236 | #[derive(Debug)]
237 | pub struct SeqScan {
238 |     fields: Vec<usize>,
239 |     scanner: Scanner,
240 |     row_buffer: Vec<OwnedValue>,
241 | }
242 | 
243 | impl SeqScan {
244 |     pub fn new(fields: Vec<usize>, scanner: Scanner) -> Self {
245 |         let row_buffer = vec![OwnedValue::Null; fields.len()];
246 | 
247 |         Self {
248 |             fields,
249 |             scanner,
250 |             row_buffer,
251 |         }
252 |     }
253 | 
254 |     fn next_row(&mut self) -> anyhow::Result<Option<&[OwnedValue]>> {
255 |         let Some(record) = self.scanner.next_record()? else {
256 |             return Ok(None);
257 |         };
258 | 
259 |         for (i, &n) in self.fields.iter().enumerate() {
260 |             self.row_buffer[i] = record.owned_field(n).context("missing record field")?;
261 |         }
262 | 
263 |         Ok(Some(&self.row_buffer))
264 |     }
265 | }
266 | ```
267 | 
268 | The `SeqScan` struct is initialized with a list of field indices to read from
269 | each record and a `Scanner` that will yield the records for every row in the
270 | table to be scanned. As the number of fields to read is identical for every row,
271 | we can preallocate a buffer to store the values of the selected fields.
272 | The next_row method retrieves the next record from the scanner, extracts
273 | the requested fields (specified by their indices), and stores them in our buffer.
274 | 
275 | Now that we have an `Operator` to evaluate `SELECT` statements, let's move on
276 | to the `Planner` struct that will produce the `Operator` from a parsed SQL query:
277 | 
278 | ```rust
279 | // src/engine/plan.rs
280 | 
281 | use anyhow::{bail, Context, Ok};
282 | 
283 | use crate::{
284 |     db::Db,
285 |     sql::ast::{self, SelectFrom},
286 | };
287 | 
288 | use super::operator::{Operator, SeqScan};
289 | 
290 | pub struct Planner<'d> {
291 |     db: &'d Db,
292 | }
293 | 
294 | impl<'d> Planner<'d> {
295 |     pub fn new(db: &'d Db) -> Self {
296 |         Self { db }
297 |     }
298 | 
299 |     pub fn compile(self, statement: &ast::Statement) -> anyhow::Result<Operator> {
300 |         match statement {
301 |             ast::Statement::Select(s) => self.compile_select(s),
302 |             stmt => bail!("unsupported statement: {stmt:?}"),
303 |         }
304 |     }
305 | }
306 | ```
307 | 
308 | The `Planner` struct is initialized with a reference to the database and
309 | provides a `compile` method that takes a parsed SQL statement and returns
310 | the corresponding `Operator`.
311 | The `compile` method dispatches to a specific method for each type of SQL statement.
312 | 
313 | Let's see how to build an `Operator` for a `SELECT` statement:
314 | 
315 | ```rust
316 | 
317 | // src/engine/plan.rs
318 | 
319 | impl<'d> Planner<'d> {
320 |     // [...] 
321 | 
322 |     fn compile_select(self, select: &ast::SelectStatement) -> anyhow::Result<Operator> {
323 |         let SelectFrom::Table(table_name) = &select.core.from;
324 | 
325 |         let table = self
326 |             .db
327 |             .tables_metadata
328 |             .iter()
329 |             .find(|m| &m.name == table_name)
330 |             .with_context(|| format!("invalid table name: {table_name}"))?;
331 | 
332 |         let mut columns = Vec::new();
333 | 
334 |         for res_col in &select.core.result_columns {
335 |             match res_col {
336 |                 ast::ResultColumn::Star => {
337 |                     for i in 0..table.columns.len() {
338 |                         columns.push(i);
339 |                     }
340 |                 }
341 |                 ast::ResultColumn::Expr(e) => {
342 |                     let ast::Expr::Column(col) = &e.expr;
343 |                     let (index, _) = table
344 |                         .columns
345 |                         .iter()
346 |                         .enumerate()
347 |                         .find(|(_, c)| c.name == col.name)
348 |                         .with_context(|| format!("invalid column name: {}", col.name))?;
349 |                     columns.push(index);
350 |                 }
351 |             }
352 |         }
353 | 
354 |         Ok(Operator::SeqScan(SeqScan::new(
355 |             columns,
356 |             self.db.scanner(table.first_page),
357 |         )))
358 |     }
359 | }
360 | ```
361 | 
362 | First, we find a table metadata entry that matches the table name in the `SELECT`
363 | statement. Then we iterate over the statement's result columns and build a list of
364 | field indices to read from each record, either by expanding `*` to all columns or
365 | by looking up the column name in the table metadata.
366 | 
367 | Finally, we create a `SeqScan` operator that will scan the entire tabl and yield
368 | the selected fields for each row.
369 | 
370 | ## Query evaluation in the REPL
371 | 
372 | It's time to put our query evaluator to the test!
373 | We'll create a simple function that reads a raw SQL query and evaluates it:
374 | 
375 | ```rust
376 | 
377 | // src/main.rs
378 | 
379 | // [...]
380 | 
381 | fn eval_query(db: &db::Db, query: &str) -> anyhow::Result<()> {
382 |     let parsed_query = sql::parse_statement(query, false)?;
383 |     let mut op = engine::plan::Planner::new(db).compile(&parsed_query)?;
384 | 
385 |     while let Some(values) = op.next_row()? {
386 |         let formated = values
387 |             .iter()
388 |             .map(ToString::to_string)
389 |             .collect::<Vec<_>>()
390 |             .join("|");
391 | 
392 |         println!("{formated}");
393 |     }
394 | 
395 |     Ok(())
396 | }
397 | ```
398 | 
399 | This function creates a pipeline: it parses the SQL query, builds an
400 | `Operator` with our Planner, and then repeatedly calls next_row() on the resulting operator
401 | to retrieve and display each row of the result.
402 | 
403 | The final step is to use this function in the REPL loop:
404 | 
405 | ```diff
406 | // src/main.rs
407 | 
408 | // [...]
409 | 
410 |  fn cli(mut db: db::Db) -> anyhow::Result<()> {
411 |      print_flushed("rqlite> ")?;
412 |  
413 |      let mut line_buffer = String::new();
414 |  
415 |      while stdin().lock().read_line(&mut line_buffer).is_ok() {
416 |          match line_buffer.trim() {
417 |              ".exit" => break,
418 |              ".tables" => display_tables(&mut db)?,
419 | +            stmt => eval_query(&db, stmt)?, 
420 | -            stmt => match sql::parse_statement(stmt, true) {
421 | -                Ok(stmt) => {
422 | -                    println!("{:?}", stmt);
423 | -                }
424 | -                Err(e) => {
425 | -                    println!("Error: {}", e);
426 | -                }
427 | -            },
428 |          }
429 |  
430 |          print_flushed("\nrqlite> ")?;
431 |  
432 |          line_buffer.clear();
433 |      }
434 |  
435 |      Ok(())
436 |  }
437 | ```
438 | 
439 | Now we can run the REPL and evaluate some simple `SELECT` statements:
440 | 
441 | ```bash
442 | cargo run -- queries_test.db
443 | rqlite> select * from table1;
444 | ```
445 | 
446 | If everything went well, you should see the following output:
447 | 
448 | ```bash
449 | 1|11
450 | 2|12
451 | 3|13
452 | ```
453 | 
454 | ## Conclusion
455 | 
456 | Our small database engine is starting to take shape! We can now parse and evaluate
457 | simple `SELECT` queries. But there's still a lot to cover before we can call it
458 | a fully functional database engine.
459 | In the next posts, we'll discover how to filter rows, read indexes, and implement
460 | sorting and grouping. 
461 | 


--------------------------------------------------------------------------------
/blog/part2.md:
--------------------------------------------------------------------------------
  1 | ### Build your own SQLite, Part 2: Scanning large tables
  2 | 
  3 | In the previous post, we discovered the SQLite file format and implemented a toy version
  4 | of the `.tables` command, allowing us to display the list of tables in a database.
  5 | But our implementation has a jarring limitation: it assumes that all the data fits into the first
  6 | page of the file. In this post, we'll discover how SQLite represents tables that are too large to fit
  7 | into a single page, this will make our `.tables` command more useful, but also lay the groundwork for
  8 | our query engine.
  9 | 
 10 | ## Erratum
 11 | 
 12 | If you're one of the early readers of the first post, and you coded along, a small mistake might
 13 | have slipped into your code: in the `load_page` method of the `Pager` struct, there is
 14 | no need to add  `HEADER_SIZE` to the `offset` parameter. Here is the beginning of the corrected version:
 15 | 
 16 | ```diff
 17 | // src/pager.rs
 18 | 
 19 | fn load_page(&mut self, n: usize) -> anyhow::Result<page::Page> {
 20 | -   let offset = HEADER_SIZE + n.saturating_sub(1) * self.page_size;
 21 | +   let offset = n.saturating_sub(1) * self.page_size;
 22 |     // the rest of the method stays the same
 23 |     // [...] 
 24 | }
 25 | ```
 26 | 
 27 | ## A motivating example
 28 | 
 29 | Let's begin our journey with a much larger test database:
 30 | 
 31 | ```bash
 32 | for i in {1..1000}; do            
 33 |     sqlite3 res/test.db "create table table$i(id integer)"
 34 | done
 35 | 
 36 | cargo run --release -- res/test.db
 37 | rqlite> .tables
 38 | ```
 39 | 
 40 | Without much surprise, our small program isn't able to display the list of tables.
 41 | The reason for that is quite simple: database pages are typically 4096 bytes long, which
 42 | is far from enough to store 1000 tables.
 43 | But why did our code fail, instead of displaying the first records that fit into the first page?
 44 | 
 45 | ## B-tree interior pages
 46 | 
 47 | When a table is too large to fit into a single page, SQLite splits it into multiple pages, of
 48 | different types:
 49 | 
 50 | - leaf pages, that contains the actual records
 51 | - interior pages, that store information about which page contains the records for which table.
 52 | 
 53 | Interior tables have the same high-level structure as leaf pages, with two key differences:
 54 | 
 55 | - instead of storing record, they store a tuple `(child_page_number, key)` where `child_page_number` is
 56 |   a 32 bits unsigned integer representing the page number of the "root" page of a subtree that contains
 57 |   records with keys lower or equal to `key`.
 58 |   Cells in interior pages are logically ordered by `key` in ascending order.
 59 | - the header contains an extra field, the "rightmost pointer", which is the page number of the "root" of the
 60 |   subtree that contains records with keys greater than the largest key in the page.
 61 | 
 62 | With this new knowledge, we can update our page data structure. We'll start by adding
 63 | the new optional `rightmost_pointer` field to the page header. We'll also add a `byte_size`
 64 | method that returns the size of the header, depending on wheter the `rightmost_pointer` field is set or not,
 65 | and add a new variant to our `PageType` enum to represent interior pages.
 66 | 
 67 | ```diff
 68 | // src/page.rs
 69 | 
 70 | #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 71 | pub enum PageType {
 72 |     TableLeaf,
 73 | +   TableInterior,
 74 | }
 75 | 
 76 | #[derive(Debug, Copy, Clone)]
 77 | pub struct PageHeader {
 78 |     pub page_type: PageType,
 79 |     pub first_freeblock: u16,
 80 |     pub cell_count: u16,
 81 |     pub cell_content_offset: u32,
 82 |     pub fragmented_bytes_count: u8,
 83 | +   pub rightmost_pointer: Option<u32>,
 84 | }
 85 | 
 86 | +impl PageHeader {
 87 | +    pub fn byte_size(&self) -> usize {
 88 | +        if self.rightmost_pointer.is_some() {
 89 | +            12
 90 | +        } else {
 91 | +            8
 92 | +        }
 93 | +    }
 94 | +}
 95 | ```
 96 | 
 97 | Let's modify the parsing function to take the new field into account:
 98 | 
 99 | ```diff
100 | // src/pager.rs
101 | 
102 | + const PAGE_LEAF_TABLE_ID: u8 = 0x0d;
103 | + const PAGE_INTERIOR_TABLE_ID: u8 = 0x05;
104 | 
105 | fn parse_page_header(buffer: &[u8]) -> anyhow::Result<page::PageHeader> {
106 | -   let page_type = match buffer[0] {
107 | -      0x0d => page::PageType::TableLeaf,
108 | +   let (page_type, has_rightmost_ptr) = match buffer[0] {
109 | +       PAGE_LEAF_TABLE_ID => (page::PageType::TableLeaf, false),
110 | +       PAGE_INTERIOR_TABLE_ID => (page::PageType::TableInterior, true),
111 |         _ => anyhow::bail!("unknown page type: {}", buffer[0]),
112 |     };
113 | 
114 |     let first_freeblock = read_be_word_at(buffer, PAGE_FIRST_FREEBLOCK_OFFSET);
115 |     let cell_count = read_be_word_at(buffer, PAGE_CELL_COUNT_OFFSET);
116 |     let cell_content_offset = match read_be_word_at(buffer, PAGE_CELL_CONTENT_OFFSET) {
117 |         0 => 65536,
118 |         n => n as u32,
119 |     };
120 |     let fragmented_bytes_count = buffer[PAGE_FRAGMENTED_BYTES_COUNT_OFFSET];
121 | 
122 | +   let rightmost_pointer = if has_rightmost_ptr {
123 | +       Some(read_be_double_at(buffer, PAGE_RIGHTMOST_POINTER_OFFSET))
124 | +   } else {
125 | +       None
126 | +   };
127 | 
128 |     Ok(page::PageHeader {
129 |         page_type,
130 |         first_freeblock,
131 |         cell_count,
132 |         cell_content_offset,
133 |         fragmented_bytes_count,
134 | +       rightmost_pointer,
135 |     })
136 | }
137 | ```
138 | 
139 | We decide whether to parse the `rightmost_pointer` field depending on the value of the `page_type`
140 | byte (`0x0d` for leaf pages, `0x05` for interior pages).
141 | 
142 | Next, we'll update the `Page` struct to reflect the fact that both leaf and interior pages
143 | share the same structure, with the only difference being the content of the cells:
144 | 
145 | ```diff
146 | // src/page.rs
147 | 
148 | #[derive(Debug, Clone)]
149 | - pub struct TableLeafPage {
150 | + pub struct Page {
151 |     pub header: PageHeader,
152 |     pub cell_pointers: Vec<u16>,
153 | -   pub cells: Vec<TableLeafCell>,
154 | +   pub cells: Vec<Cell>,
155 | }
156 | 
157 | - #[derive(Debug, Clone)]
158 | - pub enum Page {
159 | -   TableLeaf(TableLeafPage),
160 | - }
161 | 
162 | + #[derive(Debug, Clone)]
163 | + pub enum Cell {
164 | +    TableLeaf(TableLeafCell),
165 | +    TableInterior(TableInteriorCell),
166 | + }
167 | 
168 | + impl From<TableLeafCell> for Cell {
169 | +    fn from(cell: TableLeafCell) -> Self {
170 | +        Cell::TableLeaf(cell)
171 | +    }
172 | + }
173 | 
174 | + impl From<TableInteriorCell> for Cell {
175 | +    fn from(cell: TableInteriorCell) -> Self {
176 | +        Cell::TableInterior(cell)
177 | +    }
178 | + }
179 | 
180 | + pub struct TableInteriorCell {
181 | +    pub left_child_page: u32,
182 | +    pub key: i64,
183 | + }
184 | ```
185 | 
186 | This change calls for a major update of our parsing functions, reproduced below:
187 | 
188 | ```rust
189 | // src/pager.rs
190 | 
191 | fn parse_page(buffer: &[u8], page_num: usize) -> anyhow::Result<page::Page> {
192 |     let ptr_offset = if page_num == 1 { HEADER_SIZE as u16 } else { 0 };
193 |     let content_buffer = &buffer[ptr_offset as usize..];
194 |     let header = parse_page_header(content_buffer)?;
195 |     let cell_pointers = parse_cell_pointers(
196 |         &content_buffer[header.byte_size()..],
197 |         header.cell_count as usize,
198 |         ptr_offset,
199 |     );
200 | 
201 |     let cells_parsing_fn = match header.page_type {
202 |         page::PageType::TableLeaf => parse_table_leaf_cell,
203 |         page::PageType::TableInterior => parse_table_interior_cell,
204 |     };
205 | 
206 |     let cells = parse_cells(content_buffer, &cell_pointers, cells_parsing_fn)?;
207 | 
208 |     Ok(page::Page {
209 |         header,
210 |         cell_pointers,
211 |         cells,
212 |     })
213 | }
214 | 
215 | fn parse_cells(
216 |     buffer: &[u8],
217 |     cell_pointers: &[u16],
218 |     parse_fn: impl Fn(&[u8]) -> anyhow::Result<page::Cell>,
219 | ) -> anyhow::Result<Vec<page::Cell>> {
220 |     cell_pointers
221 |         .iter()
222 |         .map(|&ptr| parse_fn(&buffer[ptr as usize..]))
223 |         .collect()
224 | }
225 | 
226 | fn parse_table_leaf_cell(mut buffer: &[u8]) -> anyhow::Result<page::Cell> {
227 |     let (n, size) = read_varint_at(buffer, 0);
228 |     buffer = &buffer[n as usize..];
229 | 
230 |     let (n, row_id) = read_varint_at(buffer, 0);
231 |     buffer = &buffer[n as usize..];
232 | 
233 |     let payload = buffer[..size as usize].to_vec();
234 | 
235 |     Ok(page::TableLeafCell {
236 |         size,
237 |         row_id,
238 |         payload,
239 |     }
240 |         .into())
241 | }
242 | 
243 | fn parse_table_interior_cell(mut buffer: &[u8]) -> anyhow::Result<page::Cell> {
244 |     let left_child_page = read_be_double_at(buffer, 0);
245 |     buffer = &buffer[4..];
246 | 
247 |     let (_, key) = read_varint_at(buffer, 0);
248 | 
249 |     Ok(page::TableInteriorCell {
250 |         left_child_page,
251 |         key,
252 |     }
253 |         .into())
254 | }
255 | ```
256 | 
257 | ## Scanning logic
258 | 
259 | Our scanning logic will need to be updated to handle interior pages. We can no longer
260 | simply iterate over the cells of a page and call it a day. Instead, we'll need to
261 | implement a depth-first algorithm that recursively explores the tree, starting from
262 | the root page.
263 | 
264 | To make our task easier, let's introduce a new `PositionedPage` struct that
265 | stores a page, along with the index of the `current` cell we're looking at:
266 | 
267 | ```rust
268 | // src/pager.rs
269 | 
270 | #[derive(Debug)]
271 | pub struct PositionedPage {
272 |     pub page: Page,
273 |     pub cell: usize,
274 | }
275 | 
276 | impl PositionedPage {
277 |     pub fn next_cell(&mut self) -> Option<&Cell> {
278 |         let cell = self.page.get(self.cell);
279 |         self.cell += 1;
280 |         cell
281 |     }
282 | 
283 |     pub fn next_page(&mut self) -> Option<u32> {
284 |         if self.page.header.page_type == PageType::TableInterior
285 |             && self.cell == self.page.cells.len()
286 |         {
287 |             self.cell += 1;
288 |             self.page.header.rightmost_pointer
289 |         } else {
290 |             None
291 |         }
292 |     }
293 | }
294 | ```
295 | 
296 | The `next_cell` method returns the content of the current cell and increments the cell index,
297 | so calling it repeatedly will yiels the content of all the cells in the page.
298 | 
299 | The `next_page` method is a bit more complex: it returns the `rightmost_pointer` of the current
300 | page if it's an interior page and we just visited the last cell, otherwise it
301 | it returns `None`.
302 | 
303 | We'll also update our `Cursor` so that it owns it's payload instead of borrowing it through a `Pager`:
304 | 
305 | ```dist
306 | // src/pager.rs
307 | 
308 | #[derive(Debug)]
309 | - pub struct Cursor<'p> { 
310 | + pub struct Cursor {
311 |     header: RecordHeader,
312 | -   pager: &'p mut Pager,
313 | -   page_index: usize,
314 | -   page_cell: usize,
315 | +    payload: Vec<u8>,
316 | }
317 | ```
318 | 
319 | This change will allow us to avoid borrowing the `Pager` mutably from the
320 | `Cursor` and the `Scanner` at the same time, which would lead to a
321 | difficult-to-solve lifetime issue.
322 | 
323 | With that out of the way, we can focus on the tree scanning logic.
324 | We'll maintain a stack of `PositionedPage` to keep track of the parent
325 | pages we've visited.
326 | At every step of the walk, there are a few cases to consider:
327 | 
328 | - if the current page is a leaf page and we haven't visited all the cells yet,
329 |   we'll just have to build a `Cursor` with the current cell's payload and return it.
330 | - if the current page is an interior page, we'll push the next page (either from the
331 |   current cell or the rightmost pointer) to the stack and continue the walk.
332 | - if we've visited all the cells of the current page, we'll pop the stack and continue
333 |   the walk from the parent page.
334 | 
335 | This logic is implemented in the new `Scanner` struct:
336 | 
337 | ```rust
338 | // src/pager.rs
339 | 
340 | #[derive(Debug)]
341 | pub struct Scanner<'p> {
342 |     pager: &'p mut Pager,
343 |     initial_page: usize,
344 |     page_stack: Vec<PositionedPage>,
345 | }
346 | 
347 | impl<'p> Scanner<'p> {
348 |     pub fn new(pager: &'p mut Pager, page: usize) -> Scanner<'p> {
349 |         Scanner {
350 |             pager,
351 |             initial_page: page,
352 |             page_stack: Vec::new(),
353 |         }
354 |     }
355 | 
356 |     pub fn next_record(&mut self) -> anyhow::Result<Option<Cursor>> {
357 |         loop {
358 |             match self.next_elem() {
359 |                 Ok(Some(ScannerElem::Cursor(cursor))) => return Ok(Some(cursor)),
360 |                 Ok(Some(ScannerElem::Page(page_num))) => {
361 |                     let new_page = self.pager.read_page(page_num as usize)?.clone();
362 |                     self.page_stack.push(PositionedPage {
363 |                         page: new_page,
364 |                         cell: 0,
365 |                     });
366 |                 }
367 |                 Ok(None) if self.page_stack.len() > 1 => {
368 |                     self.page_stack.pop();
369 |                 }
370 |                 Ok(None) => return Ok(None),
371 |                 Err(e) => return Err(e),
372 |             }
373 |         }
374 |     }
375 | 
376 |     fn next_elem(&mut self) -> anyhow::Result<Option<ScannerElem>> {
377 |         let Some(page) = self.current_page()? else {
378 |             return Ok(None);
379 |         };
380 | 
381 |         if let Some(page) = page.next_page() {
382 |             return Ok(Some(ScannerElem::Page(page)));
383 |         }
384 | 
385 |         let Some(cell) = page.next_cell() else {
386 |             return Ok(None);
387 |         };
388 | 
389 |         match cell {
390 |             Cell::TableLeaf(cell) => {
391 |                 let header = parse_record_header(&cell.payload)?;
392 |                 Ok(Some(ScannerElem::Cursor(Cursor {
393 |                     header,
394 |                     payload: cell.payload.clone(),
395 |                 })))
396 |             }
397 |             Cell::TableInterior(cell) => Ok(Some(ScannerElem::Page(cell.left_child_page))),
398 |         }
399 |     }
400 | 
401 |     fn current_page(&mut self) -> anyhow::Result<Option<&mut PositionedPage>> {
402 |         if self.page_stack.is_empty() {
403 |             let page = match self.pager.read_page(self.initial_page) {
404 |                 Ok(page) => page.clone(),
405 |                 Err(e) => return Err(e),
406 |             };
407 | 
408 |             self.page_stack.push(PositionedPage { page, cell: 0 });
409 |         }
410 | 
411 |         Ok(self.page_stack.last_mut())
412 |     }
413 | }
414 | 
415 | #[derive(Debug)]
416 | enum ScannerElem {
417 |     Page(u32),
418 |     Cursor(Cursor),
419 | }
420 | ```
421 | 
422 | ## Putting it all together
423 | 
424 | The only change that remains to be made is to update the `display_tables` function
425 | to account for the change in `next_record` signature:
426 | 
427 | ```diff
428 | // src/main.rs
429 | 
430 | fn display_tables(db: &mut db::Db) -> anyhow::Result<()> {
431 |     let mut scanner = db.scanner(1);
432 | 
433 | -   while let Some(Ok(mut record)) = scanner.next_record() {
434 | +   while let Some(mut record) = scanner.next_record()? {
435 |         let type_value = record
436 |             .field(0)
437 |             .context("missing type field")
438 |             .context("invalid type field")?;
439 | 
440 |         if type_value.as_str() == Some("table") {
441 |             let name_value = record
442 |                 .field(1)
443 |                 .context("missing name field")
444 |                 .context("invalid name field")?;
445 | 
446 |             print!("{} ", name_value.as_str().unwrap());
447 |         }
448 |     }
449 | 
450 |     Ok(())
451 | }
452 | ```
453 | 
454 | We can now display our (long!) list of tables:
455 | 
456 | ```bash
457 | cargo run --release -- res/test.db
458 | rqlite> .tables
459 | ```
460 | 
461 | ## Conclusion
462 | 
463 | Our scanning logic is now able to handle tables that span multiple pages, thanks to the introduction
464 | of interior pages. This is a major milestone in our journey to build a fully functional
465 | database! In the next post, we'll learn how to parse simple SQL queries and will lay
466 | the groundwork for our query engine.
467 | 


--------------------------------------------------------------------------------
/blog/part3.md:
--------------------------------------------------------------------------------
  1 | ### Build your own SQLite, Part 3: SQL parsing 101
  2 | 
  3 | After discovering the SQLite file format and implementing the `.tables` command
  4 | in [part 1](/build-your-own-sqlite-part-1-listing-tables)
  5 | and [part 2](/build-your-own-sqlite-part-2-scanning-large-tables) of this series,
  6 | we're ready to tackle the next big challenge: writing our own SQL parser from scratch.
  7 | 
  8 | As the SQL dialect supported by SQLite is quite large and complex, we'll initially limit ourselves to
  9 | a subset that comprises only the `select` statement, in a striped-down form. Only expressions
 10 | of the form `select <columns> from <table>` will be supported, where `<columns>` is either `*` or a
 11 | comma-separated list of columns names (with an optional `as` alias), and `<table>` is the name of a table.
 12 | 
 13 | The full SQL syntax, as implemented in SQLite is described in great detail in
 14 | the [SQL As Understood By SQLite](https://www.sqlite.org/lang.html) document.
 15 | 
 16 | ## Parsing Basics
 17 | 
 18 | Our SQL parser will follow a conventional 2 steps process: lexical analysis (or tokenization)
 19 | and syntax analysis (or parsing).
 20 | 
 21 | ![](https://cdn.hashnode.com/res/hashnode/image/upload/v1731883774774/be10e087-f8dd-44be-bde1-452d01092447.png align="
 22 | center")
 23 | 
 24 | The lexical analysis step takes the input SQL string and groups individual characters
 25 | into tokens, which are meaningful units of the language. For example, the character
 26 | sequence S-E-L-E-C-T will be grouped into a single token of type `select`, and the
 27 | sequence `*` will be grouped into a token of type `star`. This stage is also responsible
 28 | for discarding whitespace and normalizing the case of the input.
 29 | 
 30 | ![](https://cdn.hashnode.com/res/hashnode/image/upload/v1731883804905/626ca57d-9426-4c5c-914e-b8d4e81df119.png align="
 31 | center")
 32 | 
 33 | The syntax analysis step takes the stream of tokens produced by the lexical analysis,
 34 | and tries to match them against the syntax rules of the language. Its output is an
 35 | abstract syntax tree (AST), which is a hierarchical representation of the input SQL.
 36 | 
 37 | ## Writing the tokenizer
 38 | 
 39 | The first step in writing our tokenizer is to define a `Token` type that will represent
 40 | the individual tokens of our SQL dialect. This definition will live in a new module:
 41 | `sql::tokenizer`.
 42 | 
 43 | ```rust
 44 | // sql/tokenizer.rs
 45 | #[derive(Debug, Eq, PartialEq)]
 46 | pub enum Token {
 47 |     Select,
 48 |     As,
 49 |     From,
 50 |     Star,
 51 |     Comma,
 52 |     SemiColon,
 53 |     Identifier(String),
 54 | }
 55 | 
 56 | impl Token {
 57 |     pub fn as_identifier(&self) -> Option<&str> {
 58 |         match self {
 59 |             Token::Identifier(ident) => Some(ident),
 60 |             _ => None,
 61 |         }
 62 |     }
 63 | }
 64 | ```
 65 | 
 66 | We also define a utility function `as_identifier` that will return the string value of
 67 | a token if it is an `Identifier`, and `None` otherwise.
 68 | 
 69 | The logic of the tokenize function is quite simple: we iterate over the input string's
 70 | characters, and based on the current character we decide which token to emit:
 71 | 
 72 | - if the character matches a single-character token, we emit it immediately
 73 | - if the character is a whitespace, it is discarded
 74 | - finally, if the character is a letter, we start a new identifier token and keep accumulating
 75 |   characters until we reach a character that is not a valid identifier character. At this point,
 76 |   if the accumulated string is a keyword, we emit the corresponding token, otherwise, we emit
 77 |   a raw `Identifier` token.
 78 | 
 79 | ```rust
 80 | // sql/tokenizer.rs
 81 | use anyhow::bail;
 82 | 
 83 | pub fn tokenize(input: &str) -> anyhow::Result<Vec<Token>> {
 84 |     let mut tokens = Vec::new();
 85 |     let mut chars = input.chars().peekable();
 86 | 
 87 |     while let Some(c) = chars.next() {
 88 |         match c {
 89 |             '*' => tokens.push(Token::Star),
 90 |             ',' => tokens.push(Token::Comma),
 91 |             ';' => tokens.push(Token::SemiColon),
 92 |             c if c.is_whitespace() => continue,
 93 |             c if c.is_alphabetic() => {
 94 |                 let mut ident = c.to_string().to_lowercase();
 95 |                 while let Some(cc) = chars.next_if(|&cc| cc.is_alphanumeric() || cc == '_') {
 96 |                     ident.extend(cc.to_lowercase());
 97 |                 }
 98 | 
 99 |                 match ident.as_str() {
100 |                     "select" => tokens.push(Token::Select),
101 |                     "as" => tokens.push(Token::As),
102 |                     "from" => tokens.push(Token::From),
103 |                     _ => tokens.push(Token::Identifier(ident)),
104 |                 }
105 |             }
106 |             _ => return Err(anyhow::anyhow!("unexpected character: {}", c)),
107 |         }
108 |     }
109 | 
110 |     Ok(tokens)
111 | }
112 | ```
113 | 
114 | Since SQL is case-insensitive, all identifiers are normalized to lower case.
115 | 
116 | ## Representing SQL statements
117 | 
118 | Before we dive into the implementation of the parser, we need to decide how to
119 | represent SQL statements in our code. We'll settle on a conventional representation,
120 | based on the description of the SQL syntax in the SQLite documentation, and write
121 | the corresponding Rust types in a new module `sql::ast`.
122 | 
123 | ```rust
124 | // sql/ast.rs
125 | 
126 | #[derive(Debug, Clone, Eq, PartialEq)]
127 | pub enum Statement {
128 |     Select(SelectStatement),
129 | }
130 | 
131 | #[derive(Debug, Clone, Eq, PartialEq)]
132 | pub struct SelectStatement {
133 |     pub core: SelectCore,
134 | }
135 | 
136 | #[derive(Debug, Clone, Eq, PartialEq)]
137 | pub struct SelectCore {
138 |     pub result_columns: Vec<ResultColumn>,
139 |     pub from: SelectFrom,
140 | }
141 | 
142 | #[derive(Debug, Clone, Eq, PartialEq)]
143 | pub enum ResultColumn {
144 |     Star,
145 |     Expr(ExprResultColumn),
146 | }
147 | 
148 | #[derive(Debug, Clone, Eq, PartialEq)]
149 | pub struct ExprResultColumn {
150 |     pub expr: Expr,
151 |     pub alias: Option<String>,
152 | }
153 | 
154 | #[derive(Debug, Clone, Eq, PartialEq)]
155 | pub enum Expr {
156 |     Column(Column),
157 | }
158 | 
159 | #[derive(Debug, Clone, Eq, PartialEq)]
160 | pub struct Column {
161 |     pub name: String,
162 | }
163 | 
164 | #[derive(Debug, Clone, Eq, PartialEq)]
165 | pub enum SelectFrom {
166 |     Table(String),
167 | }
168 | ```
169 | 
170 | The following query:
171 | 
172 | ```sql
173 | select col1 as first, col2
174 | from table
175 | ```
176 | 
177 | Will be parsed into the following rust structure:
178 | 
179 | ```rust
180 | Statement::Select(SelectStatement {
181 | core: SelectCore {
182 | result_columns: vec![
183 |     ResultColumn::Expr(ExprResultColumn {
184 |         expr: Expr::Column(Column {
185 |             name: "col1".to_string()
186 |         }),
187 |         alias: Some("first".to_string())
188 |     }),
189 |     ResultColumn::Expr(ExprResultColumn {
190 |         expr: Expr::Column(Column {
191 |             name: "col2".to_string()
192 |         }),
193 |         alias: None
194 |     }),
195 | ],
196 | from: SelectFrom::Table("table".to_string()),
197 | },
198 | })
199 | ```
200 | 
201 | You may notice a few redundancies in this representation, such as the `Expr` enum
202 | that comprises a single variant. This is intentional, as it will allow us to add
203 | new syntactic constructs in future episodes without breaking too much of the
204 | existing code.
205 | 
206 | ## Writing the parser
207 | 
208 | Parsing algorithms come in all shapes and sizes, and a full discussion of the topic
209 | if beyond the scope of this article. The one we'll use here is called recursive descent
210 | and is reasonably simple to understand and implement:
211 | 
212 | - for every node type, we'll define a function that tries to build the node from the current input
213 |   tokens, and fails if it is not possible. For example, we'll define a method that builds a `Column` node
214 |   by consuming an `Identifier` token, and fails if the current token is not an `Identifier` token.
215 | - complex "nested" nodes are build by delegating the parsing of their child nodes to other functions.
216 |   For example, `ExprResultColmn` is build by parsing an `Expr` node and an optional `as` token followed
217 |   by an `Identifier` token.
218 | 
219 | In a fully-fledged parser, these functions can be mutually recursive.
220 | 
221 | First, let's define a `ParserState` struct that will hold the state of the parser:
222 | the list of tokens, and the current position in the list.
223 | 
224 | ```rust
225 | // sql/parser.rs
226 | 
227 | use anyhow::{bail, Context};
228 | 
229 | use crate::sql::{
230 |     ast::{
231 |         Column, Expr, ExprResultColumn, ResultColumn, SelectCore, SelectFrom, SelectStatement,
232 |         Statement,
233 |     },
234 |     tokenizer::{self, Token},
235 | };
236 | 
237 | #[derive(Debug)]
238 | struct ParserState {
239 |     tokens: Vec<Token>,
240 |     pos: usize,
241 | }
242 | 
243 | impl ParserState {
244 |     fn new(tokens: Vec<Token>) -> Self {
245 |         Self { tokens, pos: 0 }
246 |     }
247 | 
248 |     fn next_token_is(&self, expected: Token) -> bool {
249 |         self.tokens.get(self.pos) == Some(&expected)
250 |     }
251 | 
252 |     fn expect_identifier(&mut self) -> anyhow::Result<&str> {
253 |         self.expect_matching(|t| matches!(t, Token::Identifier(_)))
254 |             .map(|t| t.as_identifier().unwrap())
255 |     }
256 | 
257 |     fn expect_eq(&mut self, expected: Token) -> anyhow::Result<&Token> {
258 |         self.expect_matching(|t| *t == expected)
259 |     }
260 | 
261 |     fn expect_matching(&mut self, f: impl Fn(&Token) -> bool) -> anyhow::Result<&Token> {
262 |         match self.next_token() {
263 |             Some(token) if f(token) => Ok(token),
264 |             Some(token) => bail!("unexpected token: {:?}", token),
265 |             None => bail!("unexpected end of input"),
266 |         }
267 |     }
268 | 
269 |     fn peak_next_token(&self) -> anyhow::Result<&Token> {
270 |         self.tokens.get(self.pos).context("unexpected end of input")
271 |     }
272 | 
273 |     fn next_token(&mut self) -> Option<&Token> {
274 |         let token = self.tokens.get(self.pos);
275 |         if token.is_some() {
276 |             self.advance();
277 |         }
278 |         token
279 |     }
280 | 
281 |     fn advance(&mut self) {
282 |         self.pos += 1;
283 |     }
284 | }
285 | ```
286 | 
287 | - `current_token_is` checks if the current token is equal to the expected token
288 | - `expect_identifier` returns the content of the current token if it is an `Identifier`,
289 |   and fails otherwise
290 | - `expect_eq` checks if the current token is equal to the expected token, and fails otherwise
291 | - `peak_next_token` allows us to look at the next token without consuming it,
292 |   and fails if there are no more tokens
293 | - `next_token` returns the current token and advances the parser's position
294 | - `advance` increments the parser's position
295 | 
296 | Armed with these primitives, we can write our simplest parser function: `parse_expr`!
297 | As the only expressions that we support for now are identifiers, the parsing function
298 | only has to check that the current token is an `Identifier` token and build a `Expr` node
299 | from its value.
300 | 
301 | ```rust
302 | // sql/parser.rs
303 | 
304 | impl ParserState {
305 |     //...
306 |     fn parse_expr(&mut self) -> anyhow::Result<Expr> {
307 |         Ok(Expr::Column(Column {
308 |             name: self.expect_identifier()?.to_string(),
309 |         }))
310 |     }
311 |     //...
312 | }
313 | ```
314 | 
315 | A bit more involved, the `parse_expr_result_column` function parses terms of
316 | the form `columnName` or `columnName as alias`. It starts by parsing the
317 | initial `Expr` node (`columnName`, in our examples), then if the next
318 | token is `as`, it consumes it and parses the `Identifier` token that follows.
319 | 
320 | ```rust
321 | // sql/parser.rs
322 | 
323 | impl ParserState {
324 |     //...
325 |     fn parse_expr_result_column(&mut self) -> anyhow::Result<ExprResultColumn> {
326 |         let expr = self.parse_expr()?;
327 |         let alias = if self.next_token_is(Token::As) {
328 |             self.advance();
329 |             Some(self.expect_identifier()?.to_string())
330 |         } else {
331 |             None
332 |         };
333 |         Ok(ExprResultColumn { expr, alias })
334 |     }
335 |     //...
336 | }
337 | ```
338 | 
339 | `ResultColumn` can represent terms of the form described above, or `*` to represent
340 | all columns of a table. The `parse_result_column` function checks if the current token
341 | is `*`, and returns a `Star` node if it is. Otherwise, it delegates the parsing of the
342 | `ExprResultColumn` node to the `parse_expr_result_column` function.
343 | 
344 | ```rust
345 | // sql/parser.rs
346 | 
347 | impl ParserState {
348 |     //...
349 |     fn parse_result_column(&mut self) -> anyhow::Result<ResultColumn> {
350 |         if self.peak_next_token()? == &Token::Star {
351 |             self.advance();
352 |             return Ok(ResultColumn::Star);
353 |         }
354 | 
355 |         Ok(ResultColumn::Expr(self.parse_expr_result_column()?))
356 |     }
357 |     //...
358 | }
359 | ```
360 | 
361 | Another interesting example is the `parse_result_colums` function, which parses
362 | a list of columns separated by commas. It starts by parsing the first column,
363 | then iterates over the following tokens as long as the token following
364 | a result column is a comma, accumulating the parsed columns in a vector.
365 | 
366 | ```rust
367 | // sql/parser.rs
368 | 
369 | impl ParserState {
370 |     //...
371 |     fn parse_result_columns(&mut self) -> anyhow::Result<Vec<ResultColumn>> {
372 |         let mut result_coluns = vec![self.parse_result_column()?];
373 |         while self.next_token_is(Token::Comma) {
374 |             self.advance();
375 |             result_coluns.push(self.parse_result_column()?);
376 |         }
377 |         Ok(result_coluns)
378 |     }
379 |     //...
380 | }
381 | ```
382 | 
383 | As you are probably getting the hang of it, implementing the remaining parsing
384 | functions can be a fun exercise. In any case, here is my implementation
385 | for reference:
386 | 
387 | ```rust
388 | // sql/parser.rs
389 | 
390 | impl ParserState {
391 |     //...
392 |     fn parse_statement(&mut self) -> anyhow::Result<Statement> {
393 |         Ok(Statement::Select(self.parse_select()?))
394 |     }
395 | 
396 |     fn parse_select(&mut self) -> anyhow::Result<SelectStatement> {
397 |         self.expect_eq(Token::Select)?;
398 |         let result_columns = self.parse_result_columns()?;
399 |         self.expect_eq(Token::From)?;
400 |         let from = self.parse_select_from()?;
401 |         Ok(SelectStatement {
402 |             core: SelectCore {
403 |                 result_columns,
404 |                 from,
405 |             },
406 |         })
407 |     }
408 | 
409 |     fn parse_select_from(&mut self) -> anyhow::Result<SelectFrom> {
410 |         let table = self.expect_identifier()?;
411 |         Ok(SelectFrom::Table(table.to_string()))
412 |     }
413 |     //...
414 | }
415 | ```
416 | 
417 | The final piece of the puzzle is a function that ties everything together,
418 | taking an input SQL string, tokenizing it, and parsing it into an AST:
419 | 
420 | ```rust
421 | // sql/parser.rs
422 | 
423 | //...
424 | 
425 | pub fn parse_statement(input: &str) -> anyhow::Result<Statement> {
426 |     let tokens = tokenizer::tokenize(input)?;
427 |     let mut state = ParserState::new(tokens);
428 |     let statement = state.parse_statement()?;
429 |     state.expect_eq(Token::SemiColon)?;
430 |     Ok(statement)
431 | }
432 | ```
433 | 
434 | ## Putting it all together
435 | 
436 | We've covered a lot of ground! Now is the time to test our parser on
437 | some actual SQL queries. To that end, let's alter our REPL loop
438 | to parse then input as an SQL statement if it does not match a know command, and
439 | print it.
440 | 
441 | ```diff
442 | // src/main.rs
443 | 
444 | + mod sql;
445 | 
446 | //...
447 | 
448 | fn cli(mut db: db::Db) -> anyhow::Result<()> {
449 |     print_flushed("rqlite> ")?;
450 | 
451 |     let mut line_buffer = String::new();
452 | 
453 |     while stdin().lock().read_line(&mut line_buffer).is_ok() {
454 |         match line_buffer.trim() {
455 |             ".exit" => break,
456 |             ".tables" => display_tables(&mut db)?,
457 | +            stmt => match sql::parse_statement(stmt) {
458 | +                Ok(stmt) => {
459 | +                    println!("{:?}", stmt);
460 | +                }
461 | +                Err(e) => {
462 | +                    println!("Error: {}", e);
463 | +                }
464 | +            },
465 | -            _ => {
466 | -               println!("Unrecognized command '{}'", line_buffer.trim());
467 | -           }
468 |         }
469 | 
470 |         print_flushed("\nrqlite> ")?;
471 | 
472 |         line_buffer.clear();
473 |     }
474 | 
475 |     Ok(())
476 | }
477 | 
478 | //...
479 | ```
480 | 
481 | ## Conclusion
482 | 
483 | Our database can read data and parse very simple SQL statements.
484 | In the next part of this series, we'll bridge the gap between these two functionalities
485 | and build a small query engine that compiles SQL queries into execution plans and
486 | executes these plans against the persisted data.
487 | 


--------------------------------------------------------------------------------
/blog/part1.md:
--------------------------------------------------------------------------------
  1 | ### Build your own SQLite, Part 1: Listing tables
  2 | 
  3 | As developers, we use databases all the time. But how do they work?
  4 | In this series, we'll try to answer that question by building our own
  5 | SQLite-compatible database from scratch.
  6 | 
  7 | Source code examples will be provided in Rust, but you are encouraged to
  8 | follow along using your language of choice, as we won't be relying
  9 | on many language-specific features or libraries.
 10 | 
 11 | As an introduction, we'll implement the simplest version of the `tables` command,
 12 | which lists the names of all the tables in a database. While this looks simple, we'll
 13 | see that it requires us to make our first deep dive into the SQLite file format.
 14 | 
 15 | ## Building the test database
 16 | 
 17 | To keep things as simple as possible, let's build a minimalistic
 18 | test database:
 19 | 
 20 | ```bash
 21 | sqlite3 minimal_test.db
 22 | sqlite> create table table1(id integer);
 23 | sqlite> create table table2(id integer);
 24 | sqlite> .exit
 25 | ```
 26 | 
 27 | This creates a database with two tables, `table1` and `table2`, each with a single
 28 | column, `id`. We can verify this by running the `tables` command in the SQLite shell:
 29 | 
 30 | ```bash
 31 | sqlite3 minimal_test.db
 32 | sqlite> .tables
 33 | table1  table2
 34 | sqlite> .exit
 35 | ```
 36 | 
 37 | ## Bootstrapping the project
 38 | 
 39 | Let's start by creating a new Rust project. We'll use the `cargo add` to add our only dependency
 40 | for now, `anyhow`:
 41 | 
 42 | ```bash
 43 | cargo new rsqlite
 44 | cd rsqlite
 45 | cargo add anyhow
 46 | ```
 47 | 
 48 | ## The SQLite file format
 49 | 
 50 | ![](https://cdn.hashnode.com/res/hashnode/image/upload/v1721572171598/5c4195b6-5472-4ba1-826d-d8f5b6660527.png align="
 51 | center")
 52 | 
 53 | SQLite databases are stored in a single file, the format of which is
 54 | documented in the [SQLite File Format Specification](https://www.sqlite.org/fileformat.html).
 55 | The file is divided into pages, with each page having the same size: a power of 2, between
 56 | 512 and 65536 bytes.
 57 | The first 100 bytes of the first page contain the database header, which includes
 58 | information such as the page size and the file format version. In this first part, we'll only
 59 | be interested in the page size.
 60 | Pages can be of different types, but for this first article, we'll only be interested in
 61 | `table btree leaf` pages, which store the actual table data.
 62 | 
 63 | Our first task will be to implement a `Pager` struct that reads and caches pages from the
 64 | database file. But before we do, we'll have to read the page size from the database header.
 65 | Let's start by defining our `Header` struct:
 66 | 
 67 | ```rust
 68 | // src/page.rs
 69 | #[derive(Debug, Copy, Clone)]
 70 | pub struct DbHeader {
 71 |     pub page_size: u32,
 72 | }
 73 | ```
 74 | 
 75 | The header starts with the magic string `SQLite format 3\0`, followed by the page size
 76 | encoded as a big-endian 2-byte integer at offset 16. With this information, we can
 77 | implement a function that reads the header from a buffer:
 78 | 
 79 | ```rust
 80 | // src/pager.rs
 81 | pub const HEADER_SIZE: usize = 100;
 82 | const HEADER_PREFIX: &[u8] = b"SQLite format 3\0";
 83 | const HEADER_PAGE_SIZE_OFFSET: usize = 16;
 84 | 
 85 | const PAGE_MAX_SIZE: u32 = 65536;
 86 | 
 87 | pub fn parse_header(buffer: &[u8]) -> anyhow::Result<page::DbHeader> {
 88 |     if !buffer.starts_with(HEADER_PREFIX) {
 89 |         let prefix = String::from_utf8_lossy(&buffer[..HEADER_PREFIX.len()]);
 90 |         anyhow::bail!("invalid header prefix: {prefix}");
 91 |     }
 92 | 
 93 |     let page_size_raw = read_be_word_at(buffer, HEADER_PAGE_SIZE_OFFSET);
 94 |     let page_size = match page_size_raw {
 95 |         1 => PAGE_MAX_SIZE,
 96 |         n if ((n & (n - 1)) == 0) && n != 0 => n as u32,
 97 |         _ => anyhow::bail!("page size is not a power of 2: {}", page_size_raw),
 98 |     };
 99 | 
100 |     Ok(page::Header { page_size })
101 | }
102 | 
103 | fn read_be_word_at(input: &[u8], offset: usize) -> u16 {
104 |     u16::from_be_bytes(input[offset..offset + 2].try_into().unwrap())
105 | }
106 | ```
107 | 
108 | Two things to note here:
109 | 
110 | - As the maximum page size cannot be represented as a 2-byte integer, a page size of 1 is use to represent the maximum
111 |   page size.
112 | - We use a somewhat convoluted expression to check if the page size is a power of 2.
113 |   The expression `n & (n - 1) == 0` is true if and only if `n` is a power of 2, except for `n = 0`.
114 | 
115 | ## Decoding Table B-tree leaf pages
116 | 
117 | ![](https://cdn.hashnode.com/res/hashnode/image/upload/v1721571943115/f84ad91d-d3a3-462e-8f2b-1b1975badb1a.png align="
118 | center")
119 | 
120 | Now that we have the minimum information we need to read pages from the disk, let's explore
121 | the content of a `table btree-leaf` page.
122 | `table btree-leaf` pages start with an 8-byte header, followed by an sequence of "cell pointers"
123 | containing the offset of every cell in the page. The cells contain the table data, and we
124 | can think of them as key-value pairs, where the key is a 64-bits integer encoded as
125 | a [varint](https://carlmastrangelo.com/blog/lets-make-a-varint)
126 | (the `rowid`) and the value is an arbitrary sequence of bytes representing the row data.
127 | The header contains the following fields:
128 | 
129 | - `page_type`: byte representing the page type. For `table btree-leaf` pages, this is 0x0D.
130 | - `first_freeblock`: 2-byte integer representing the offset of the first free block in the page, or zero if there is no
131 |   freeblock.
132 | - `cell_count`: 2-byte integer representing the number of cells in the page.
133 | - `cell_content_offset`: 2-byte integer representing the offset of the first cell.
134 | - `fragmented_bytes_count`: 1-byte integer representing the number of fragmented free bytes in the page (we won't make
135 |   use of it for now).
136 | 
137 | We'll start by defining a `Page` enum representing a parsed page, along with
138 | the necessary structs to represent the page header and the cell pointers:
139 | 
140 | ```rust
141 | #[derive(Debug, Clone)]
142 | pub enum Page {
143 |     TableLeaf(TableLeafPage),
144 | }
145 | 
146 | #[derive(Debug, Clone)]
147 | pub struct TableLeafPage {
148 |     pub header: PageHeader,
149 |     pub cell_pointers: Vec<u16>,
150 |     pub cells: Vec<TableLeafCell>,
151 | }
152 | 
153 | #[derive(Debug, Copy, Clone)]
154 | pub struct PageHeader {
155 |     pub page_type: PageType,
156 |     pub first_freeblock: u16,
157 |     pub cell_count: u16,
158 |     pub cell_content_offset: u32,
159 |     pub fragmented_bytes_count: u8,
160 | }
161 | 
162 | #[derive(Debug, Copy, Clone)]
163 | pub enum PageType {
164 |     TableLeaf,
165 | }
166 | 
167 | #[derive(Debug, Clone)]
168 | pub struct TableLeafCell {
169 |     pub size: i64,
170 |     pub row_id: i64,
171 |     pub payload: Vec<u8>,
172 | }
173 | ```
174 | 
175 | The corresponding parsing functions are quite straightforward. Note the offset handling
176 | in `parse_page`: since the first page contains the database header, we start parsing
177 | the page at offset 100.
178 | 
179 | ```rust
180 | /// pager.rs
181 | const PAGE_LEAF_HEADER_SIZE: usize = 8;
182 | const PAGE_FIRST_FREEBLOCK_OFFSET: usize = 1;
183 | const PAGE_CELL_COUNT_OFFSET: usize = 3;
184 | const PAGE_CELL_CONTENT_OFFSET: usize = 5;
185 | const PAGE_FRAGMENTED_BYTES_COUNT_OFFSET: usize = 7;
186 | 
187 | fn parse_page(buffer: &[u8], page_num: usize) -> anyhow::Result<page::Page> {
188 |     let ptr_offset = if page_num == 1 { HEADER_SIZE as u16 } else { 0 };
189 | 
190 |     match buffer[0] {
191 |         PAGE_LEAF_TABLE_ID => parse_table_leaf_page(buffer, ptr_offset),
192 |         _ => Err(anyhow::anyhow!("unknown page type: {}", buffer[0])),
193 |     }
194 | }
195 | 
196 | fn parse_table_leaf_page(buffer: &[u8], ptr_offset: u16) -> anyhow::Result<page::Page> {
197 |     let header = parse_page_header(buffer)?;
198 | 
199 |     let content_buffer = &buffer[PAGE_LEAF_HEADER_SIZE..];
200 |     let cell_pointers = parse_cell_pointers(content_buffer, header.cell_count as usize, ptr_offset);
201 | 
202 |     let cells = cell_pointers
203 |         .iter()
204 |         .map(|&ptr| parse_table_leaf_cell(&buffer[ptr as usize..]))
205 |         .collect::<anyhow::Result<Vec<page::TableLeafCell>>>()?;
206 | 
207 |     Ok(page::Page::TableLeaf(page::TableLeafPage {
208 |         header,
209 |         cell_pointers,
210 |         cells,
211 |     }))
212 | }
213 | 
214 | 
215 | fn parse_page_header(buffer: &[u8]) -> anyhow::Result<page::PageHeader> {
216 |     let page_type = match buffer[0] {
217 |         0x0d => page::PageType::TableLeaf,
218 |         _ => anyhow::bail!("unknown page type: {}", buffer[0]),
219 |     };
220 | 
221 |     let first_freeblock = read_be_word_at(buffer, PAGE_FIRST_FREEBLOCK_OFFSET);
222 |     let cell_count = read_be_word_at(buffer, PAGE_CELL_COUNT_OFFSET);
223 |     let cell_content_offset = match read_be_word_at(buffer, PAGE_CELL_CONTENT_OFFSET) {
224 |         0 => 65536,
225 |         n => n as u32,
226 |     };
227 |     let fragmented_bytes_count = buffer[PAGE_FRAGMENTED_BYTES_COUNT_OFFSET];
228 | 
229 |     Ok(page::PageHeader {
230 |         page_type,
231 |         first_freeblock,
232 |         cell_count,
233 |         cell_content_offset,
234 |         fragmented_bytes_count,
235 |     })
236 | }
237 | 
238 | 
239 | fn parse_cell_pointers(buffer: &[u8], n: usize, ptr_offset: u16) -> Vec<u16> {
240 |     let mut pointers = Vec::with_capacity(n);
241 |     for i in 0..n {
242 |         pointers.push(read_be_word_at(buffer, 2 * i) - ptr_offset);
243 |     }
244 |     pointers
245 | }
246 | 
247 | fn parse_table_leaf_cell(mut buffer: &[u8]) -> anyhow::Result<page::TableLeafCell> {
248 |     let (n, size) = read_varint_at(buffer, 0);
249 |     buffer = &buffer[n as usize..];
250 | 
251 |     let (n, row_id) = read_varint_at(buffer, 0);
252 |     buffer = &buffer[n as usize..];
253 | 
254 |     let payload = buffer[..size as usize].to_vec();
255 | 
256 |     Ok(page::TableLeafCell {
257 |         size,
258 |         row_id,
259 |         payload,
260 |     })
261 | }
262 | 
263 | pub fn read_varint_at(buffer: &[u8], mut offset: usize) -> (u8, i64) {
264 |     let mut size = 0;
265 |     let mut result = 0;
266 | 
267 |     while size < 9 {
268 |         let current_byte = buffer[offset] as i64;
269 |         if size == 8 {
270 |             result = (result << 8) | current_byte;
271 |         } else {
272 |             result = (result << 7) | (current_byte & 0b0111_1111);
273 |         }
274 | 
275 |         offset += 1;
276 |         size += 1;
277 | 
278 |         if current_byte & 0b1000_0000 == 0 {
279 |             break;
280 |         }
281 |     }
282 | 
283 |     (size, result)
284 | }
285 | ```
286 | 
287 | To read a varint, we copy the 7 least significant bits of each byte to the result, as long as the most significant bit is set. As the maximum length of a varint is 9 bytes, keep track of 
288 | the number of bytes visited and stop after a maximum of 9 bytes. Note that to
289 | complete a 64 bits value, we need the first 7 bits of the first 8 bytes
290 | and all the bits of the last byte. That's why we test the current size 
291 | of the varint at each iteration and add a special case for the last byte (when `size == 8`).
292 | 
293 | 
294 | We can finally implement the pager itself. For now, it only loads and caches pages without
295 | any eviction policy:
296 | 
297 | ```rust
298 | // pager.rs
299 | #[derive(Debug, Clone)]
300 | pub struct Pager<I: Read + Seek = std::fs::File> {
301 |     input: I,
302 |     page_size: usize,
303 |     pages: HashMap<usize, page::Page>,
304 | }
305 | 
306 | impl<I: Read + Seek> Pager<I> {
307 |     pub fn new(input: I, page_size: usize) -> Self {
308 |         Self {
309 |             input,
310 |             page_size,
311 |             pages: HashMap::new(),
312 |         }
313 |     }
314 | 
315 |     pub fn read_page(&mut self, n: usize) -> anyhow::Result<&page::Page> {
316 |         if self.pages.contains_key(&n) {
317 |             return Ok(self.pages.get(&n).unwrap());
318 |         }
319 | 
320 |         let page = self.load_page(n)?;
321 |         self.pages.insert(n, page);
322 |         Ok(self.pages.get(&n).unwrap())
323 |     }
324 | 
325 |     fn load_page(&mut self, n: usize) -> anyhow::Result<page::Page> {
326 |         let offset = n.saturating_sub(1) * self.page_size;
327 | 
328 |         self.input
329 |             .seek(SeekFrom::Start(offset as u64))
330 |             .context("seek to page start")?;
331 | 
332 |         let mut buffer = vec![0; self.page_size];
333 |         self.input.read_exact(&mut buffer).context("read page")?;
334 | 
335 |         parse_page(&buffer, n)
336 |     }
337 | }
338 | ```
339 | 
340 | ## Records
341 | 
342 | We now have a way to read pages, and to access the pages cells. But how to decode the values of the cells?
343 | Each cell contains the value of a row in the table, encoded using
344 | the [SQLite record format](https://www.sqlite.org/fileformat2.html#record_format).
345 | The record format is quite simple: a record consists of a header, followed by a sequence of field values.
346 | The header starts with a varint representing the byte size of the headerm followed by a sequence
347 | of varints -one per column- determining the type of each column according to the following table:
348 | 
349 | - 0: NULL
350 | - 1: 8-bits signed integer
351 | - 2: 16-bits signed integer
352 | - 3: 24-bits signed integer
353 | - 4: 32-bits signed integer
354 | - 5: 48-bits signed integer
355 | - 6: 64-bits signed integer
356 | - 7: 64-bits IEEE floating point number
357 | - 8: value is the integer 0
358 | - 9: value is the integer 1
359 | - 10 & 11: reserved for internal use
360 | - n with n even and n > 12: BLOB of size (n - 12) / 2
361 | - n with n odd and n > 13: text of size (n - 13) / 2
362 | 
363 | We now have all the informations we need to parse and represent record's headers:
364 | 
365 | ```rust
366 | // src/cursor.rs
367 | #[derive(Debug, Copy, Clone)]
368 | pub enum RecordFieldType {
369 |     Null,
370 |     I8,
371 |     I16,
372 |     I24,
373 |     I32,
374 |     I48,
375 |     I64,
376 |     Float,
377 |     Zero,
378 |     One,
379 |     String,
380 |     Blob,
381 | }
382 | 
383 | #[derive(Debug, Clone)]
384 | pub struct RecordField {
385 |     pub offset: usize,
386 |     pub field_type: RecordFieldType,
387 | }
388 | 
389 | #[derive(Debug, Clone)]
390 | pub struct RecordHeader {
391 |     pub fields: Vec<RecordField>,
392 | }
393 | 
394 | fn parse_record_header(mut buffer: &[u8]) -> anyhow::Result<RecordHeader> {
395 |     let (varint_size, header_length) = crate::pager::read_varint_at(buffer, 0);
396 |     buffer = &buffer[varint_size as usize..header_length as usize];
397 | 
398 |     let mut fields = Vec::new();
399 |     let mut current_offset = header_length as usize;
400 | 
401 |     while !buffer.is_empty() {
402 |         let (discriminant_size, discriminant) = crate::pager::read_varint_at(buffer, 0);
403 |         buffer = &buffer[discriminant_size as usize..];
404 | 
405 |         let (field_type, field_size) = match discriminant {
406 |             0 => (RecordFieldType::Null, 0),
407 |             1 => (RecordFieldType::I8, 1),
408 |             2 => (RecordFieldType::I16, 2),
409 |             3 => (RecordFieldType::I24, 3),
410 |             4 => (RecordFieldType::I32, 4),
411 |             5 => (RecordFieldType::I48, 6),
412 |             6 => (RecordFieldType::I64, 8),
413 |             7 => (RecordFieldType::Float, 8),
414 |             8 => (RecordFieldType::Zero, 0),
415 |             9 => (RecordFieldType::One, 0),
416 |             n if n >= 12 && n % 2 == 0 => {
417 |                 let size = ((n - 12) / 2) as usize;
418 |                 (RecordFieldType::Blob(size), size)
419 |             }
420 |             n if n >= 13 && n % 2 == 1 => {
421 |                 let size = ((n - 13) / 2) as usize;
422 |                 (RecordFieldType::String(size), size)
423 |             }
424 |             n => anyhow::bail!("unsupported field type: {}", n),
425 |         };
426 | 
427 |         fields.push(RecordField {
428 |             offset: current_offset,
429 |             field_type,
430 |         });
431 | 
432 |         current_offset += field_size;
433 |     }
434 | 
435 |     Ok(RecordHeader { fields })
436 | }
437 | ```
438 | 
439 | To make it easier to work with records, we'll define a `Value` type, representing field values
440 | and a `Cursor` struct that uniquely identifies a record within a database file. The `Cursor`
441 | will expose a `field` method, returning the value of the record's n-th field:
442 | 
443 | ```rust
444 | // src/value.rs
445 | use std::borrow::Cow;
446 | 
447 | #[derive(Debug, Clone)]
448 | pub enum Value<'p> {
449 |     Null,
450 |     String(Cow<'p, str>),
451 |     Blob(Cow<'p, [u8]>),
452 |     Int(i64),
453 |     Float(f64),
454 | }
455 | 
456 | impl<'p> Value<'p> {
457 |     pub fn as_str(&self) -> Option<&str> {
458 |         if let Value::String(s) = self {
459 |             Some(s.as_ref())
460 |         } else {
461 |             None
462 |         }
463 |     }
464 | }
465 | 
466 | ```
467 | 
468 | ```rust
469 | // src/cursor.rs
470 | #[derive(Debug)]
471 | pub struct Cursor<'p> {
472 |     header: RecordHeader,
473 |     pager: &'p mut Pager,
474 |     page_index: usize,
475 |     page_cell: usize,
476 | }
477 | 
478 | impl<'p> Cursor<'p> {
479 |     pub fn field(&mut self, n: usize) -> Option<Value> {
480 |         let record_field = self.header.fields.get(n)?;
481 | 
482 |         let payload = match self.pager.read_page(self.page_index) {
483 |             Ok(Page::TableLeaf(leaf)) => &leaf.cells[self.page_cell].payload,
484 |             _ => return None,
485 |         };
486 | 
487 |         match record_field.field_type {
488 |             RecordFieldType::Null => Some(Value::Null),
489 |             RecordFieldType::I8 => Some(Value::Int(read_i8_at(payload, record_field.offset))),
490 |             RecordFieldType::I16 => Some(Value::Int(read_i16_at(payload, record_field.offset))),
491 |             RecordFieldType::I24 => Some(Value::Int(read_i24_at(payload, record_field.offset))),
492 |             RecordFieldType::I32 => Some(Value::Int(read_i32_at(payload, record_field.offset))),
493 |             RecordFieldType::I48 => Some(Value::Int(read_i48_at(payload, record_field.offset))),
494 |             RecordFieldType::I64 => Some(Value::Int(read_i64_at(payload, record_field.offset))),
495 |             RecordFieldType::Float => Some(Value::Float(read_f64_at(payload, record_field.offset))),
496 |             RecordFieldType::String(length) => {
497 |                 let value = std::str::from_utf8(
498 |                     &payload[record_field.offset..record_field.offset + length],
499 |                 ).expect("invalid utf8");
500 |                 Some(Value::String(Cow::Borrowed(value)))
501 |             }
502 |             RecordFieldType::Blob(length) => {
503 |                 let value = &payload[record_field.offset..record_field.offset + length];
504 |                 Some(Value::Blob(Cow::Borrowed(value)))
505 |             }
506 |             _ => panic!("unimplemented"),
507 |         }
508 |     }
509 | }
510 | 
511 | fn read_i8_at(input: &[u8], offset: usize) -> i64 {
512 |     input[offset] as i64
513 | }
514 | 
515 | fn read_i16_at(input: &[u8], offset: usize) -> i64 {
516 |     i16::from_be_bytes(input[offset..offset + 2].try_into().unwrap()) as i64
517 | }
518 | 
519 | fn read_i24_at(input: &[u8], offset: usize) -> i64 {
520 |     (i32::from_be_bytes(input[offset..offset + 3].try_into().unwrap()) & 0x00FFFFFF) as i64
521 | }
522 | 
523 | fn read_i32_at(input: &[u8], offset: usize) -> i64 {
524 |     i32::from_be_bytes(input[offset..offset + 4].try_into().unwrap()) as i64
525 | }
526 | 
527 | fn read_i48_at(input: &[u8], offset: usize) -> i64 {
528 |     i64::from_be_bytes(input[offset..offset + 6].try_into().unwrap()) & 0x0000FFFFFFFFFFFF
529 | }
530 | 
531 | fn read_i64_at(input: &[u8], offset: usize) -> i64 {
532 |     i64::from_be_bytes(input[offset..offset + 8].try_into().unwrap())
533 | }
534 | 
535 | fn read_f64_at(input: &[u8], offset: usize) -> f64 {
536 |     f64::from_be_bytes(input[offset..offset + 8].try_into().unwrap())
537 | }
538 | ```
539 | 
540 | To simplify iteration over a page's records, we'll also implement a `Scanner` struct that
541 | wraps a page and allows us to get a `Cursor` for each record:
542 | 
543 | ```rust
544 | // src/cursor.rs
545 | #[derive(Debug)]
546 | pub struct Scanner<'p> {
547 |     pager: &'p mut Pager,
548 |     page: usize,
549 |     cell: usize,
550 | }
551 | 
552 | impl<'p> Scanner<'p> {
553 |     pub fn new(pager: &'p mut Pager, page: usize) -> Scanner<'p> {
554 |         Scanner {
555 |             pager,
556 |             page,
557 |             cell: 0,
558 |         }
559 |     }
560 |     pub fn next_record(&mut self) -> Option<anyhow::Result<Cursor>> {
561 |         let page = match self.pager.read_page(self.page) {
562 |             Ok(page) => page,
563 |             Err(e) => return Some(Err(e)),
564 |         };
565 | 
566 |         match page {
567 |             Page::TableLeaf(leaf) => {
568 |                 let cell = leaf.cells.get(self.cell)?;
569 | 
570 |                 let header = match parse_record_header(&cell.payload) {
571 |                     Ok(header) => header,
572 |                     Err(e) => return Some(Err(e)),
573 |                 };
574 | 
575 |                 let record = Cursor {
576 |                     header,
577 |                     pager: self.pager,
578 |                     page_index: self.page,
579 |                     page_cell: self.cell,
580 |                 };
581 | 
582 |                 self.cell += 1;
583 | 
584 |                 Some(Ok(record))
585 |             }
586 |         }
587 |     }
588 | }
589 | ```
590 | 
591 | ## Table descriptions
592 | 
593 | With most of the leg work out of the way, we can get back to our original goal: listing tables.
594 | SQLite stores the schema of a database in a special table called `sqlite_master`.
595 | The schema for the `sqlite_master` table is as follows:
596 | 
597 | ```sql
598 | CREATE TABLE sqlite_schema(
599 |   type text,
600 |   name text,
601 |   tbl_name text,
602 |   rootpage integer,
603 |   sql text
604 | );
605 | ```
606 | 
607 | Theses columns are used as follows:
608 | 
609 | - `type`: the type of the schema object. For tables, this will always be `table`.
610 | - `name`: the name of the schema object.
611 | - `tbl_name`: the name of the table the schema object is associated with. In the case of tables, this will be the same
612 |   as `name`.
613 | - `rootpage`: root page of the table, we'll use it later to read the table's content.
614 | - `sql`: the SQL statement used to create the table.
615 | 
616 | Since our simple database only handles basic schemas for now, we can assume that the entire
617 | schema fits in the first page of our database file.
618 | In order to list the tables in the database, we'll need to:
619 | 
620 | - initialize the pager with the database file
621 | - create a `Scanner` for the first page
622 | - iterate over the records, and print the value of the `name` field (at index 1) for each record.
623 | 
624 | First, we'll define a `Db` struct to hold our global state:
625 | 
626 | ```rust
627 | // src/db.rs
628 | use std::{io::Read, path::Path};
629 | 
630 | use anyhow::Context;
631 | 
632 | use crate::{cursor::Scanner, page::DbHeader, pager, pager::Pager};
633 | 
634 | pub struct Db {
635 |     pub header: DbHeader,
636 |     pager: Pager,
637 | }
638 | 
639 | impl Db {
640 |     pub fn from_file(filename: impl AsRef<Path>) -> anyhow::Result<Db> {
641 |         let mut file = std::fs::File::open(filename.as_ref()).context("open db file")?;
642 | 
643 |         let mut header_buffer = [0; pager::HEADER_SIZE];
644 |         file.read_exact(&mut header_buffer)
645 |             .context("read db header")?;
646 | 
647 |         let header = pager::parse_header(&header_buffer).context("parse db header")?;
648 | 
649 |         let pager = Pager::new(file, header.page_size as usize);
650 | 
651 |         Ok(Db { header, pager })
652 |     }
653 | 
654 |     pub fn scanner(&mut self, page: usize) -> Scanner {
655 |         Scanner::new(&mut self.pager, page)
656 |     }
657 | }
658 | ```
659 | 
660 | The implementation of a basic REPL supporting the `tables` and `tables` commands is straightforward:
661 | 
662 | ```rust
663 | use std::io::{stdin, BufRead, Write};
664 | 
665 | use anyhow::Context;
666 | 
667 | mod cursor;
668 | mod db;
669 | mod page;
670 | mod pager;
671 | mod value;
672 | 
673 | fn main() -> anyhow::Result<()> {
674 |     let database = db::Db::from_file(std::env::args().nth(1).context("missing db file")?)?;
675 |     cli(database)
676 | }
677 | 
678 | fn cli(mut db: db::Db) -> anyhow::Result<()> {
679 |     print_flushed("rqlite> ")?;
680 | 
681 |     let mut line_buffer = String::new();
682 | 
683 |     while stdin().lock().read_line(&mut line_buffer).is_ok() {
684 |         match line_buffer.trim() {
685 |             ".exit" => break,
686 |             ".tables" => display_tables(&mut db)?,
687 |             _ => {
688 |                 println!("Unrecognized command '{}'", line_buffer.trim());
689 |             }
690 |         }
691 | 
692 |         print_flushed("\nrqlite> ")?;
693 | 
694 |         line_buffer.clear();
695 |     }
696 | 
697 |     Ok(())
698 | }
699 | 
700 | fn display_tables(db: &mut db::Db) -> anyhow::Result<()> {
701 |     let mut scanner = db.scanner(1);
702 | 
703 |     while let Some(Ok(mut record)) = scanner.next_record() {
704 |         let type_value = record
705 |             .field(0)
706 |             .context("missing type field")
707 |             .context("invalid type field")?;
708 | 
709 |         if type_value.as_str() == Some("table") {
710 |             let name_value = record
711 |                 .field(1)
712 |                 .context("missing name field")
713 |                 .context("invalid name field")?;
714 | 
715 |             print!("{} ", name_value.as_str().unwrap());
716 |         }
717 |     }
718 | 
719 |     Ok(())
720 | }
721 | 
722 | fn print_flushed(s: &str) -> anyhow::Result<()> {
723 |     print!("{}", s);
724 |     std::io::stdout().flush().context("flush stdout")
725 | }
726 | ```
727 | 
728 | ## Conclusion
729 | 
730 | The first part of our SQLite-compatible database is now complete. We can read the database header,
731 | parse table btree-leaf pages and decode records, but we still have a long way to go before we can
732 | support rich queries. In the next part, we'll learn how to parse the SQL language and make
733 | our first stides towards implementing the `SELECT` statement!
734 | 


--------------------------------------------------------------------------------