├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md └── src ├── btree.rs ├── btree ├── branch.rs ├── leaf.rs └── node.rs ├── buffer.rs ├── disk.rs ├── executor.rs ├── latch.rs ├── main.rs ├── query.rs └── slotted.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | *.qp 3 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "anyhow" 5 | version = "1.0.35" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | checksum = "2c0df63cb2955042487fad3aefd2c6e3ae7389ac5dc1beb28921de0b69f779d4" 8 | 9 | [[package]] 10 | name = "byteorder" 11 | version = "1.3.4" 12 | source = "registry+https://github.com/rust-lang/crates.io-index" 13 | checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" 14 | 15 | [[package]] 16 | name = "cfg-if" 17 | version = "0.1.10" 18 | source = "registry+https://github.com/rust-lang/crates.io-index" 19 | checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 20 | 21 | [[package]] 22 | name = "cfg-if" 23 | version = "1.0.0" 24 | source = "registry+https://github.com/rust-lang/crates.io-index" 25 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 26 | 27 | [[package]] 28 | name = "getrandom" 29 | version = "0.1.16" 30 | source = "registry+https://github.com/rust-lang/crates.io-index" 31 | checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" 32 | dependencies = [ 33 | "cfg-if 1.0.0", 34 | "libc", 35 | "wasi", 36 | ] 37 | 38 | [[package]] 39 | name = "hex" 40 | version = "0.4.2" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "644f9158b2f133fd50f5fb3242878846d9eb792e445c893805ff0e3824006e35" 43 | dependencies = [ 44 | "serde", 45 | ] 46 | 47 | [[package]] 48 | name = "instant" 49 | version = "0.1.9" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec" 52 | dependencies = [ 53 | "cfg-if 1.0.0", 54 | ] 55 | 56 | [[package]] 57 | name = "itoa" 58 | version = "0.4.6" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6" 61 | 62 | [[package]] 63 | name = "libc" 64 | version = "0.2.81" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" 67 | 68 | [[package]] 69 | name = "lock_api" 70 | version = "0.4.2" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312" 73 | dependencies = [ 74 | "scopeguard", 75 | ] 76 | 77 | [[package]] 78 | name = "parking_lot" 79 | version = "0.11.1" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb" 82 | dependencies = [ 83 | "instant", 84 | "lock_api", 85 | "parking_lot_core", 86 | ] 87 | 88 | [[package]] 89 | name = "parking_lot_core" 90 | version = "0.8.1" 91 | source = "registry+https://github.com/rust-lang/crates.io-index" 92 | checksum = "d7c6d9b8427445284a09c55be860a15855ab580a417ccad9da88f5a06787ced0" 93 | dependencies = [ 94 | "cfg-if 1.0.0", 95 | "instant", 96 | "libc", 97 | "redox_syscall", 98 | "smallvec", 99 | "winapi", 100 | ] 101 | 102 | [[package]] 103 | name = "ppv-lite86" 104 | version = "0.2.10" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" 107 | 108 | [[package]] 109 | name = "pretty-hex" 110 | version = "0.2.1" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "bc5c99d529f0d30937f6f4b8a86d988047327bb88d04d2c4afc356de74722131" 113 | 114 | [[package]] 115 | name = "proc-macro2" 116 | version = "1.0.24" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" 119 | dependencies = [ 120 | "unicode-xid", 121 | ] 122 | 123 | [[package]] 124 | name = "qp" 125 | version = "0.1.0" 126 | dependencies = [ 127 | "anyhow", 128 | "hex", 129 | "parking_lot", 130 | "pretty-hex", 131 | "serde", 132 | "serde_json", 133 | "tempfile", 134 | "thiserror", 135 | "zerocopy", 136 | ] 137 | 138 | [[package]] 139 | name = "quote" 140 | version = "1.0.7" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37" 143 | dependencies = [ 144 | "proc-macro2", 145 | ] 146 | 147 | [[package]] 148 | name = "rand" 149 | version = "0.7.3" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 152 | dependencies = [ 153 | "getrandom", 154 | "libc", 155 | "rand_chacha", 156 | "rand_core", 157 | "rand_hc", 158 | ] 159 | 160 | [[package]] 161 | name = "rand_chacha" 162 | version = "0.2.2" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" 165 | dependencies = [ 166 | "ppv-lite86", 167 | "rand_core", 168 | ] 169 | 170 | [[package]] 171 | name = "rand_core" 172 | version = "0.5.1" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 175 | dependencies = [ 176 | "getrandom", 177 | ] 178 | 179 | [[package]] 180 | name = "rand_hc" 181 | version = "0.2.0" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 184 | dependencies = [ 185 | "rand_core", 186 | ] 187 | 188 | [[package]] 189 | name = "redox_syscall" 190 | version = "0.1.57" 191 | source = "registry+https://github.com/rust-lang/crates.io-index" 192 | checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce" 193 | 194 | [[package]] 195 | name = "remove_dir_all" 196 | version = "0.5.3" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" 199 | dependencies = [ 200 | "winapi", 201 | ] 202 | 203 | [[package]] 204 | name = "ryu" 205 | version = "1.0.5" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" 208 | 209 | [[package]] 210 | name = "scopeguard" 211 | version = "1.1.0" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 214 | 215 | [[package]] 216 | name = "serde" 217 | version = "1.0.118" 218 | source = "registry+https://github.com/rust-lang/crates.io-index" 219 | checksum = "06c64263859d87aa2eb554587e2d23183398d617427327cf2b3d0ed8c69e4800" 220 | dependencies = [ 221 | "serde_derive", 222 | ] 223 | 224 | [[package]] 225 | name = "serde_derive" 226 | version = "1.0.118" 227 | source = "registry+https://github.com/rust-lang/crates.io-index" 228 | checksum = "c84d3526699cd55261af4b941e4e725444df67aa4f9e6a3564f18030d12672df" 229 | dependencies = [ 230 | "proc-macro2", 231 | "quote", 232 | "syn", 233 | ] 234 | 235 | [[package]] 236 | name = "serde_json" 237 | version = "1.0.60" 238 | source = "registry+https://github.com/rust-lang/crates.io-index" 239 | checksum = "1500e84d27fe482ed1dc791a56eddc2f230046a040fa908c08bda1d9fb615779" 240 | dependencies = [ 241 | "itoa", 242 | "ryu", 243 | "serde", 244 | ] 245 | 246 | [[package]] 247 | name = "smallvec" 248 | version = "1.5.1" 249 | source = "registry+https://github.com/rust-lang/crates.io-index" 250 | checksum = "ae524f056d7d770e174287294f562e95044c68e88dec909a00d2094805db9d75" 251 | 252 | [[package]] 253 | name = "syn" 254 | version = "1.0.54" 255 | source = "registry+https://github.com/rust-lang/crates.io-index" 256 | checksum = "9a2af957a63d6bd42255c359c93d9bfdb97076bd3b820897ce55ffbfbf107f44" 257 | dependencies = [ 258 | "proc-macro2", 259 | "quote", 260 | "unicode-xid", 261 | ] 262 | 263 | [[package]] 264 | name = "synstructure" 265 | version = "0.12.4" 266 | source = "registry+https://github.com/rust-lang/crates.io-index" 267 | checksum = "b834f2d66f734cb897113e34aaff2f1ab4719ca946f9a7358dba8f8064148701" 268 | dependencies = [ 269 | "proc-macro2", 270 | "quote", 271 | "syn", 272 | "unicode-xid", 273 | ] 274 | 275 | [[package]] 276 | name = "tempfile" 277 | version = "3.1.0" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | checksum = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9" 280 | dependencies = [ 281 | "cfg-if 0.1.10", 282 | "libc", 283 | "rand", 284 | "redox_syscall", 285 | "remove_dir_all", 286 | "winapi", 287 | ] 288 | 289 | [[package]] 290 | name = "thiserror" 291 | version = "1.0.22" 292 | source = "registry+https://github.com/rust-lang/crates.io-index" 293 | checksum = "0e9ae34b84616eedaaf1e9dd6026dbe00dcafa92aa0c8077cb69df1fcfe5e53e" 294 | dependencies = [ 295 | "thiserror-impl", 296 | ] 297 | 298 | [[package]] 299 | name = "thiserror-impl" 300 | version = "1.0.22" 301 | source = "registry+https://github.com/rust-lang/crates.io-index" 302 | checksum = "9ba20f23e85b10754cd195504aebf6a27e2e6cbe28c17778a0c930724628dd56" 303 | dependencies = [ 304 | "proc-macro2", 305 | "quote", 306 | "syn", 307 | ] 308 | 309 | [[package]] 310 | name = "unicode-xid" 311 | version = "0.2.1" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" 314 | 315 | [[package]] 316 | name = "wasi" 317 | version = "0.9.0+wasi-snapshot-preview1" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 320 | 321 | [[package]] 322 | name = "winapi" 323 | version = "0.3.9" 324 | source = "registry+https://github.com/rust-lang/crates.io-index" 325 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 326 | dependencies = [ 327 | "winapi-i686-pc-windows-gnu", 328 | "winapi-x86_64-pc-windows-gnu", 329 | ] 330 | 331 | [[package]] 332 | name = "winapi-i686-pc-windows-gnu" 333 | version = "0.4.0" 334 | source = "registry+https://github.com/rust-lang/crates.io-index" 335 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 336 | 337 | [[package]] 338 | name = "winapi-x86_64-pc-windows-gnu" 339 | version = "0.4.0" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 342 | 343 | [[package]] 344 | name = "zerocopy" 345 | version = "0.3.0" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "6580539ad917b7c026220c4b3f2c08d52ce54d6ce0dc491e66002e35388fab46" 348 | dependencies = [ 349 | "byteorder", 350 | "zerocopy-derive", 351 | ] 352 | 353 | [[package]] 354 | name = "zerocopy-derive" 355 | version = "0.2.0" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "d498dbd1fd7beb83c86709ae1c33ca50942889473473d287d56ce4770a18edfb" 358 | dependencies = [ 359 | "proc-macro2", 360 | "syn", 361 | "synstructure", 362 | ] 363 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "qp" 3 | version = "0.1.0" 4 | authors = ["Hidekazu Kobayashi "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | parking_lot = "0.11" 11 | thiserror = "1.0" 12 | anyhow = "1.0" 13 | serde = { version = "1.0", features = ["derive"] } 14 | serde_json = "1.0" 15 | zerocopy = "0.3" 16 | hex = { version = "0.4", features = ["serde"] } 17 | 18 | [dev-dependencies] 19 | tempfile = "3.1" 20 | pretty-hex = "0.2" 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020 Hidekazu Kobayashi 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # qp 2 | 3 | A didactic on-disk key-value database management system. 4 | -------------------------------------------------------------------------------- /src/btree.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | convert::TryInto, 3 | ops::{Deref, DerefMut}, 4 | }; 5 | 6 | use parking_lot::RawRwLock; 7 | use thiserror::Error; 8 | 9 | use crate::{buffer::Buffer, latch::OwnedRwLockExt}; 10 | use crate::{ 11 | buffer::{self, BufferPoolManager}, 12 | latch::OwnedRwLockReadGuard, 13 | latch::OwnedRwLockWriteGuard, 14 | }; 15 | 16 | use super::disk::PageId; 17 | 18 | mod branch; 19 | mod leaf; 20 | mod node; 21 | 22 | #[derive(Debug, Error)] 23 | pub enum Error { 24 | #[error(transparent)] 25 | Buffer(#[from] buffer::Error), 26 | #[error("dead lock")] 27 | Deadlock, 28 | } 29 | 30 | struct BTreePage { 31 | data: T, 32 | } 33 | 34 | impl BTreePage 35 | where 36 | T: Deref, 37 | { 38 | fn root_page_id(&self) -> PageId { 39 | let bytes = self.data[0..8].try_into().unwrap(); 40 | PageId(u64::from_be_bytes(bytes)) 41 | } 42 | } 43 | 44 | impl BTreePage 45 | where 46 | T: DerefMut, 47 | { 48 | fn set_root_page_id(&mut self, PageId(prev_page_id): PageId) { 49 | self.data[0..8].copy_from_slice(&prev_page_id.to_be_bytes()); 50 | } 51 | } 52 | 53 | pub type Key = [u8; 8]; 54 | 55 | pub struct Access<'a> { 56 | bufmgr: &'a BufferPoolManager, 57 | pub btree_page_id: PageId, 58 | } 59 | 60 | impl<'a> Access<'a> { 61 | pub fn create(bufmgr: &'a BufferPoolManager) -> Result { 62 | let (btree_page_id, meta_buffer) = bufmgr.create_page()?; 63 | let mut rw_meta_buffer = meta_buffer.write_owned(); 64 | let mut btree = BTreePage { 65 | data: &mut rw_meta_buffer.page[..], 66 | }; 67 | let (root_page_id, root_buffer) = bufmgr.create_page()?; 68 | let mut rw_root_buffer = root_buffer.write_owned(); 69 | let mut root = node::NodePage::new(rw_root_buffer.page.as_mut()).unwrap(); 70 | let mut leaf = root.initialize_as_leaf(); 71 | leaf.initialize(); 72 | btree.set_root_page_id(root_page_id); 73 | Ok(Self { 74 | bufmgr, 75 | btree_page_id, 76 | }) 77 | } 78 | 79 | pub fn open(bufmgr: &'a BufferPoolManager, btree_page_id: PageId) -> Self { 80 | Self { 81 | bufmgr, 82 | btree_page_id, 83 | } 84 | } 85 | 86 | fn get_internal( 87 | &self, 88 | ro_node_buffer: OwnedRwLockReadGuard, 89 | key: Key, 90 | buf: &mut Vec, 91 | ) -> Result { 92 | let node = node::NodePage::new(ro_node_buffer.page.as_ref()).unwrap(); 93 | match node.node() { 94 | node::Node::Leaf(leaf) => Ok(leaf.get(key).map(|value| buf.extend(value)).is_some()), 95 | node::Node::Branch(branch) => { 96 | let index = branch.find(key); 97 | let child_page_id = branch.pair(index).child(); 98 | let child_node_page = self.bufmgr.fetch_page(child_page_id)?.read_owned(); 99 | drop(ro_node_buffer); 100 | self.get_internal(child_node_page, key, buf) 101 | } 102 | } 103 | } 104 | 105 | pub fn get(&self, key: Key, buf: &mut Vec) -> Result { 106 | let ro_meta_buffer = self.bufmgr.fetch_page(self.btree_page_id)?.read_owned(); 107 | let btree = BTreePage { 108 | data: &ro_meta_buffer.page[..], 109 | }; 110 | let root_page_id = btree.root_page_id(); 111 | let ro_root_buffer = self.bufmgr.fetch_page(root_page_id)?.read_owned(); 112 | drop(ro_meta_buffer); 113 | self.get_internal(ro_root_buffer, key, buf) 114 | } 115 | 116 | fn iter_internal( 117 | &self, 118 | ro_node_buffer: OwnedRwLockReadGuard, 119 | key: Option, 120 | ) -> Result, Error> { 121 | let node = node::NodePage::new(ro_node_buffer.page.as_ref()).unwrap(); 122 | match node.node() { 123 | node::Node::Leaf(leaf) => { 124 | let start = key 125 | .map(|key| leaf.find(key).unwrap_or_else(|index| index)) 126 | .unwrap_or(0); 127 | Ok(Iter { 128 | bufmgr: &self.bufmgr, 129 | index: start, 130 | buffer: Some(ro_node_buffer), 131 | }) 132 | } 133 | node::Node::Branch(branch) => { 134 | let index = key.map(|key| branch.find(key)).unwrap_or(0); 135 | let child_page_id = branch.pair(index).child(); 136 | let child_node_page = self.bufmgr.fetch_page(child_page_id)?.read_owned(); 137 | drop(ro_node_buffer); 138 | self.iter_internal(child_node_page, key) 139 | } 140 | } 141 | } 142 | 143 | pub fn iter(&self, key: Option) -> Result, Error> { 144 | let btree_page = self.bufmgr.fetch_page(self.btree_page_id)?.read_owned(); 145 | let btree = BTreePage { 146 | data: &btree_page.page[..], 147 | }; 148 | let root_page_id = btree.root_page_id(); 149 | let root_page = self.bufmgr.fetch_page(root_page_id)?.read_owned(); 150 | drop(btree_page); 151 | self.iter_internal(root_page, key) 152 | } 153 | 154 | fn iter_rev_internal( 155 | &self, 156 | ro_node_buffer: OwnedRwLockReadGuard, 157 | key: Option, 158 | ) -> Result, Error> { 159 | let node = node::NodePage::new(ro_node_buffer.page.as_ref()).unwrap(); 160 | match node.node() { 161 | node::Node::Leaf(leaf) => { 162 | let start = key 163 | .map(|key| { 164 | leaf.find(key) 165 | .map(|index| index as isize) 166 | .unwrap_or_else(|index| index as isize - 1) 167 | }) 168 | .unwrap_or_else(|| leaf.num_records() as isize - 1); 169 | Ok(IterRev { 170 | bufmgr: &self.bufmgr, 171 | index: start, 172 | buffer: Some(ro_node_buffer), 173 | }) 174 | } 175 | node::Node::Branch(branch) => { 176 | let index = key 177 | .map(|key| branch.find(key)) 178 | .unwrap_or_else(|| branch.num_pairs() - 1); 179 | let child_page_id = branch.pair(index).child(); 180 | let child_node_page = self.bufmgr.fetch_page(child_page_id)?.read_owned(); 181 | drop(ro_node_buffer); 182 | self.iter_rev_internal(child_node_page, key) 183 | } 184 | } 185 | } 186 | 187 | pub fn iter_rev(&self, key: Option) -> Result, Error> { 188 | let ro_meta_buffer = self.bufmgr.fetch_page(self.btree_page_id)?.read_owned(); 189 | let btree = BTreePage { 190 | data: &ro_meta_buffer.page[..], 191 | }; 192 | let root_page_id = btree.root_page_id(); 193 | let root_page = self.bufmgr.fetch_page(root_page_id)?.read_owned(); 194 | drop(ro_meta_buffer); 195 | self.iter_rev_internal(root_page, key) 196 | } 197 | 198 | fn put_internal( 199 | &self, 200 | node_page_id: PageId, 201 | mut rw_node_buffer: OwnedRwLockWriteGuard, 202 | key: Key, 203 | value: &[u8], 204 | ) -> Result, Error> { 205 | let mut node = node::NodePage::new(rw_node_buffer.page.as_mut()).unwrap(); 206 | match node.node_mut() { 207 | node::Node::Leaf(mut leaf) => { 208 | if leaf.put(key, value) { 209 | rw_node_buffer.is_dirty = true; 210 | Ok(None) 211 | } else { 212 | let next_leaf_page_id = leaf.next_page_id(); 213 | let next_leaf_page = next_leaf_page_id 214 | .map(|next_leaf_page_id| { 215 | self.bufmgr 216 | .fetch_page(next_leaf_page_id)? 217 | .try_write_owned() 218 | .map(Ok) 219 | .unwrap_or(Err(Error::Deadlock)) 220 | }) 221 | .transpose()?; 222 | 223 | let (new_leaf_page_id, new_leaf_page) = self.bufmgr.create_page()?; 224 | 225 | if let Some(mut rw_next_leaf_buffer) = next_leaf_page { 226 | let mut node_page = 227 | node::NodePage::new(rw_next_leaf_buffer.page.as_mut()).unwrap(); 228 | let mut next_leaf = node_page.node_mut().try_into_leaf().ok().unwrap(); 229 | next_leaf.set_prev_page_id(Some(new_leaf_page_id)); 230 | } 231 | leaf.set_next_page_id(Some(new_leaf_page_id)); 232 | 233 | let mut rw_new_leaf_buffer = new_leaf_page.write_owned(); 234 | let mut new_leaf_node_page = 235 | node::NodePage::new(rw_new_leaf_buffer.page.as_mut()).unwrap(); 236 | let mut new_leaf = new_leaf_node_page.initialize_as_leaf(); 237 | new_leaf.initialize(); 238 | let new_leaf_first_key = leaf.split_put(&mut new_leaf, key, value); 239 | new_leaf.set_prev_page_id(Some(node_page_id)); 240 | new_leaf.set_next_page_id(next_leaf_page_id); 241 | rw_node_buffer.is_dirty = true; 242 | Ok(Some((new_leaf_first_key, new_leaf_page_id))) 243 | } 244 | } 245 | node::Node::Branch(mut branch) => { 246 | let index = branch.find(key); 247 | let child_page_id = branch.pair(index).child(); 248 | let child_node_page = self.bufmgr.fetch_page(child_page_id)?.write_owned(); 249 | if let Some((key, child)) = 250 | self.put_internal(child_page_id, child_node_page, key, value)? 251 | { 252 | branch.insert(index + 1, key, child); 253 | if branch.max_pairs() <= branch.num_pairs() { 254 | let (new_branch_page_id, new_branch_page) = self.bufmgr.create_page()?; 255 | let mut rw_new_branch_buffer = new_branch_page.write_owned(); 256 | let mut new_branch_node_page = 257 | node::NodePage::new(rw_new_branch_buffer.page.as_mut()).unwrap(); 258 | let mut new_branch = new_branch_node_page.initialize_as_branch(); 259 | let overflow_key = branch.split(&mut new_branch); 260 | rw_node_buffer.is_dirty = true; 261 | Ok(Some((overflow_key, new_branch_page_id))) 262 | } else { 263 | rw_node_buffer.is_dirty = true; 264 | Ok(None) 265 | } 266 | } else { 267 | Ok(None) 268 | } 269 | } 270 | } 271 | } 272 | 273 | pub fn put(&self, key: Key, value: &[u8]) -> Result<(), Error> { 274 | let mut rw_meta_buffer = self.bufmgr.fetch_page(self.btree_page_id)?.write_owned(); 275 | let mut btree = BTreePage { 276 | data: &mut rw_meta_buffer.page[..], 277 | }; 278 | let root_page_id = btree.root_page_id(); 279 | let root_page = self.bufmgr.fetch_page(root_page_id)?.write_owned(); 280 | if let Some((key, child)) = self.put_internal(root_page_id, root_page, key, value)? { 281 | let (new_root_page_id, new_root_page) = self.bufmgr.create_page()?; 282 | let mut new_root_page = new_root_page.write_owned(); 283 | let mut node_page = node::NodePage::new(new_root_page.page.as_mut()).unwrap(); 284 | let mut branch = node_page.initialize_as_branch(); 285 | branch.initialize(key, root_page_id, child); 286 | btree.set_root_page_id(new_root_page_id); 287 | rw_meta_buffer.is_dirty = true; 288 | } 289 | Ok(()) 290 | } 291 | } 292 | 293 | pub struct Iter<'a> { 294 | bufmgr: &'a BufferPoolManager, 295 | buffer: Option>, 296 | index: usize, 297 | } 298 | impl<'a> Iter<'a> { 299 | pub fn next(&mut self, buf: &mut Vec) -> Result, Error> { 300 | if let Some(ro_buffer) = &self.buffer { 301 | let node_page = node::NodePage::new(ro_buffer.page.as_ref()).unwrap(); 302 | let leaf = node_page.node().try_into_leaf().ok().unwrap(); 303 | if self.index < leaf.num_records() { 304 | let record = leaf.record(self.index); 305 | self.index += 1; 306 | buf.extend(record.value); 307 | Ok(Some(record.key())) 308 | } else { 309 | self.buffer = match leaf.next_page_id() { 310 | Some(next_page_id) => Some(self.bufmgr.fetch_page(next_page_id)?.read_owned()), 311 | None => None, 312 | }; 313 | self.index = 0; 314 | self.next(buf) 315 | } 316 | } else { 317 | Ok(None) 318 | } 319 | } 320 | } 321 | 322 | pub struct IterRev<'a> { 323 | bufmgr: &'a BufferPoolManager, 324 | buffer: Option>, 325 | index: isize, 326 | } 327 | impl<'a> IterRev<'a> { 328 | pub fn next(&mut self, buf: &mut Vec) -> Result, Error> { 329 | if let Some(ro_buffer) = &self.buffer { 330 | let node_page = node::NodePage::new(ro_buffer.page.as_ref()).unwrap(); 331 | let leaf = node_page.node().try_into_leaf().ok().unwrap(); 332 | if self.index >= 0 { 333 | let record = leaf.record(self.index as usize); 334 | self.index -= 1; 335 | buf.extend(record.value); 336 | Ok(Some(record.key())) 337 | } else { 338 | self.buffer = match leaf.prev_page_id() { 339 | Some(prev_page_id) => { 340 | let ro_prev_buffer = self.bufmgr.fetch_page(prev_page_id)?.read_owned(); 341 | let prev_node_page = node::NodePage::new(ro_prev_buffer.page.as_ref()).unwrap(); 342 | let leaf = prev_node_page.node().try_into_leaf().ok().unwrap(); 343 | self.index = leaf.num_records() as isize - 1; 344 | Some(ro_prev_buffer) 345 | } 346 | None => None, 347 | }; 348 | self.next(buf) 349 | } 350 | } else { 351 | Ok(None) 352 | } 353 | } 354 | } 355 | 356 | #[cfg(test)] 357 | mod tests { 358 | use tempfile::tempfile; 359 | 360 | use crate::{buffer::BufferPool, disk::DiskManager}; 361 | 362 | use super::*; 363 | #[test] 364 | fn test() { 365 | let disk = DiskManager::new(tempfile().unwrap()).unwrap(); 366 | let pool = BufferPool::new(10); 367 | let bufmgr = BufferPoolManager::new(disk, pool); 368 | let btree_access = Access::create(&bufmgr).unwrap(); 369 | btree_access.put(6u64.to_be_bytes(), b"world").unwrap(); 370 | btree_access.put(3u64.to_be_bytes(), b"hello").unwrap(); 371 | btree_access.put(8u64.to_be_bytes(), b"!").unwrap(); 372 | btree_access.put(4u64.to_be_bytes(), b",").unwrap(); 373 | 374 | let mut buf = vec![]; 375 | assert!(btree_access.get(3u64.to_be_bytes(), &mut buf).unwrap()); 376 | assert_eq!(b"hello", &*buf); 377 | buf.clear(); 378 | assert!(btree_access.get(8u64.to_be_bytes(), &mut buf).unwrap()); 379 | assert_eq!(b"!", &*buf); 380 | buf.clear(); 381 | } 382 | 383 | #[test] 384 | fn test_split() { 385 | let disk = DiskManager::new(tempfile().unwrap()).unwrap(); 386 | let pool = BufferPool::new(10); 387 | let bufmgr = BufferPoolManager::new(disk, pool); 388 | let btree_access = Access::create(&bufmgr).unwrap(); 389 | let long_padding = vec![0xDEu8; 1500]; 390 | btree_access.put(6u64.to_be_bytes(), &long_padding).unwrap(); 391 | btree_access.put(3u64.to_be_bytes(), &long_padding).unwrap(); 392 | btree_access.put(8u64.to_be_bytes(), &long_padding).unwrap(); 393 | btree_access.put(4u64.to_be_bytes(), &long_padding).unwrap(); 394 | btree_access.put(5u64.to_be_bytes(), b"hello").unwrap(); 395 | 396 | let mut buf = vec![]; 397 | assert!(btree_access.get(5u64.to_be_bytes(), &mut buf).unwrap()); 398 | assert_eq!(b"hello", &*buf); 399 | buf.clear(); 400 | } 401 | 402 | #[test] 403 | fn test_iter() { 404 | let disk = DiskManager::new(tempfile().unwrap()).unwrap(); 405 | let pool = BufferPool::new(10); 406 | let bufmgr = BufferPoolManager::new(disk, pool); 407 | let btree_access = Access::create(&bufmgr).unwrap(); 408 | let long_padding = vec![0xDEu8; 1500]; 409 | btree_access.put(6u64.to_be_bytes(), &long_padding).unwrap(); 410 | btree_access.put(3u64.to_be_bytes(), &long_padding).unwrap(); 411 | btree_access.put(8u64.to_be_bytes(), &long_padding).unwrap(); 412 | btree_access.put(4u64.to_be_bytes(), &long_padding).unwrap(); 413 | btree_access.put(5u64.to_be_bytes(), b"hello").unwrap(); 414 | 415 | let mut iter = btree_access.iter(Some(4u64.to_be_bytes())).unwrap(); 416 | let mut buf = vec![]; 417 | assert_eq!(Some(4u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 418 | assert_eq!(&long_padding, &buf); 419 | buf.clear(); 420 | assert_eq!(Some(5u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 421 | assert_eq!(b"hello", &*buf); 422 | buf.clear(); 423 | assert_eq!(Some(6u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 424 | assert_eq!(&long_padding, &buf); 425 | buf.clear(); 426 | assert_eq!(Some(8u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 427 | assert_eq!(&long_padding, &buf); 428 | buf.clear(); 429 | assert_eq!(None, iter.next(&mut buf).unwrap()); 430 | } 431 | 432 | #[test] 433 | fn test_rev_iter() { 434 | let disk = DiskManager::new(tempfile().unwrap()).unwrap(); 435 | let pool = BufferPool::new(10); 436 | let bufmgr = BufferPoolManager::new(disk, pool); 437 | let btree_access = Access::create(&bufmgr).unwrap(); 438 | let long_padding = vec![0xDEu8; 1500]; 439 | btree_access.put(6u64.to_be_bytes(), &long_padding).unwrap(); 440 | btree_access.put(3u64.to_be_bytes(), &long_padding).unwrap(); 441 | btree_access.put(8u64.to_be_bytes(), &long_padding).unwrap(); 442 | btree_access.put(4u64.to_be_bytes(), &long_padding).unwrap(); 443 | btree_access.put(5u64.to_be_bytes(), b"hello").unwrap(); 444 | 445 | let mut iter = btree_access.iter_rev(Some(7u64.to_be_bytes())).unwrap(); 446 | let mut buf = vec![]; 447 | assert_eq!(Some(6u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 448 | assert_eq!(&long_padding, &buf); 449 | buf.clear(); 450 | assert_eq!(Some(5u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 451 | assert_eq!(b"hello", &*buf); 452 | buf.clear(); 453 | assert_eq!(Some(4u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 454 | assert_eq!(&long_padding, &buf); 455 | buf.clear(); 456 | assert_eq!(Some(3u64.to_be_bytes()), iter.next(&mut buf).unwrap()); 457 | assert_eq!(&long_padding, &buf); 458 | buf.clear(); 459 | assert_eq!(None, iter.next(&mut buf).unwrap()); 460 | } 461 | } 462 | -------------------------------------------------------------------------------- /src/btree/branch.rs: -------------------------------------------------------------------------------- 1 | use std::{ops::Range, convert::TryInto, mem::size_of, ops::{Deref, DerefMut}}; 2 | 3 | use zerocopy::{AsBytes, ByteSlice, ByteSliceMut, FromBytes, LayoutVerified}; 4 | 5 | use crate::disk::PageId; 6 | use super::Key; 7 | 8 | #[derive(Debug, FromBytes, AsBytes)] 9 | #[repr(C)] 10 | pub struct Header { 11 | num_pairs: u16, 12 | } 13 | 14 | pub struct Pair { 15 | data: T, 16 | } 17 | 18 | impl Pair<()> { 19 | const SIZE: usize = size_of::() + size_of::(); 20 | 21 | fn offset(index: usize) -> usize { 22 | index as usize * Self::SIZE 23 | } 24 | 25 | fn range(range: Range) -> Range { 26 | Self::offset(range.start)..Self::offset(range.end) 27 | } 28 | } 29 | 30 | impl<'a> Pair<&'a [u8]> { 31 | fn read(slice: &'a [u8], index: usize) -> Self { 32 | Pair { 33 | data: &slice[Pair::range(index..index + 1)] 34 | } 35 | } 36 | } 37 | 38 | impl<'a> Pair<&'a mut [u8]> { 39 | fn read_mut(slice: &'a mut [u8], index: usize) -> Self { 40 | Pair { 41 | data: &mut slice[Pair::range(index..index + 1)] 42 | } 43 | } 44 | } 45 | 46 | impl Pair 47 | where 48 | T: Deref 49 | { 50 | pub fn key(&self) -> Key { 51 | self.data[..size_of::()].try_into().unwrap() 52 | } 53 | 54 | pub fn child(&self) -> PageId { 55 | let bytes: [u8; 8] = self.data[size_of::()..].try_into().unwrap(); 56 | bytes.into() 57 | } 58 | } 59 | 60 | impl Pair 61 | where 62 | T: DerefMut 63 | { 64 | pub fn set_key(&mut self, key: Key) { 65 | self.data[..size_of::()].copy_from_slice(&key); 66 | } 67 | 68 | pub fn set_child(&mut self, child: PageId) { 69 | let bytes: [u8; 8] = child.into(); 70 | self.data[size_of::()..].copy_from_slice(&bytes); 71 | } 72 | } 73 | 74 | pub struct Branch { 75 | header: LayoutVerified, 76 | body: B, 77 | } 78 | 79 | impl Branch { 80 | pub fn new(bytes: B) -> Option { 81 | let (header, body) = LayoutVerified::new_from_prefix(bytes)?; 82 | Some(Self { header, body }) 83 | } 84 | 85 | pub fn pair(&self, index: usize) -> Pair<&[u8]> { 86 | Pair::read(&self.body, index) 87 | } 88 | 89 | pub fn max_pairs(&self) -> usize { 90 | self.body.len() / Pair::SIZE 91 | } 92 | 93 | pub fn num_pairs(&self) -> usize { 94 | self.header.num_pairs as usize 95 | } 96 | 97 | pub fn find(&self, key: Key) -> usize { 98 | use std::cmp::Ordering::{Equal, Greater}; 99 | let mut base = 1usize; 100 | let mut size = self.num_pairs() - 1; 101 | while size > 1 { 102 | let half = size / 2; 103 | let mid = base + half; 104 | base = if self.pair(mid).key() > key { 105 | base 106 | } else { 107 | mid 108 | }; 109 | size -= half; 110 | } 111 | let cmp = self.pair(base).key().cmp(&key); 112 | if cmp == Equal { 113 | base 114 | } else { 115 | base - (cmp == Greater) as usize 116 | } 117 | } 118 | } 119 | 120 | impl Branch { 121 | pub fn initialize(&mut self, key: Key, left_child: PageId, right_child: PageId) { 122 | self.header.num_pairs = 2; 123 | self.pair_mut(0).set_child(left_child); 124 | let mut right = self.pair_mut(1); 125 | right.set_key(key); 126 | right.set_child(right_child); 127 | } 128 | 129 | pub fn pair_mut(&mut self, index: usize) -> Pair<&mut [u8]> { 130 | Pair::read_mut(&mut self.body, index) 131 | } 132 | 133 | pub fn insert(&mut self, index: usize, key: Key, child: PageId) { 134 | let num_children = self.num_pairs(); 135 | self.body.copy_within(Pair::range(index..num_children), Pair::offset(index + 1)); 136 | let mut pair = self.pair_mut(index); 137 | pair.set_key(key); 138 | pair.set_child(child); 139 | self.header.num_pairs += 1; 140 | } 141 | 142 | pub fn split(&mut self, new_branch: &mut Branch) -> Key { 143 | let num_keys = self.num_pairs(); 144 | let mid = num_keys / 2; 145 | let mid_key = self.pair(mid).key(); 146 | let src = &self.body[Pair::range(mid..num_keys)]; 147 | new_branch.body[0..src.len()].copy_from_slice(&src); 148 | new_branch.header.num_pairs = (num_keys - mid) as u16; 149 | self.header.num_pairs = (mid - 1) as u16; 150 | mid_key 151 | } 152 | } 153 | 154 | #[cfg(test)] 155 | mod tests { 156 | use super::*; 157 | 158 | #[test] 159 | fn test_insert_find() { 160 | let mut data = vec![0u8; 100]; 161 | let mut branch = Branch::new(data.as_mut_slice()).unwrap(); 162 | branch.initialize(5u64.to_be_bytes(), PageId(1), PageId(2)); 163 | branch.insert(2, 8u64.to_be_bytes(), PageId(3)); 164 | branch.insert(3, 11u64.to_be_bytes(), PageId(4)); 165 | assert_eq!(0, branch.find(1u64.to_be_bytes())); 166 | assert_eq!(1, branch.find(5u64.to_be_bytes())); 167 | assert_eq!(1, branch.find(6u64.to_be_bytes())); 168 | assert_eq!(2, branch.find(8u64.to_be_bytes())); 169 | assert_eq!(2, branch.find(10u64.to_be_bytes())); 170 | assert_eq!(3, branch.find(11u64.to_be_bytes())); 171 | assert_eq!(3, branch.find(12u64.to_be_bytes())); 172 | } 173 | 174 | #[test] 175 | fn test_split() { 176 | let mut data = vec![0u8; 100]; 177 | let mut branch = Branch::new(data.as_mut_slice()).unwrap(); 178 | branch.initialize(5u64.to_be_bytes(), PageId(1), PageId(2)); 179 | branch.insert(2, 8u64.to_be_bytes(), PageId(3)); 180 | branch.insert(3, 11u64.to_be_bytes(), PageId(4)); 181 | let mut data2 = vec![0u8; 100]; 182 | let mut branch2 = Branch::new(data2.as_mut_slice()).unwrap(); 183 | let mid_key = branch.split(&mut branch2); 184 | assert_eq!(8u64.to_be_bytes(), mid_key); 185 | assert_eq!(0, branch.find(1u64.to_be_bytes())); 186 | assert_eq!(1, branch.find(5u64.to_be_bytes())); 187 | assert_eq!(1, branch.find(6u64.to_be_bytes())); 188 | assert_eq!(1, branch.find(8u64.to_be_bytes())); 189 | 190 | assert_eq!(0, branch2.find(9u64.to_be_bytes())); 191 | assert_eq!(1, branch2.find(11u64.to_be_bytes())); 192 | assert_eq!(1, branch2.find(12u64.to_be_bytes())); 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /src/btree/leaf.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | mem::size_of, 3 | }; 4 | 5 | use zerocopy::{AsBytes, ByteSlice, ByteSliceMut, FromBytes, LayoutVerified}; 6 | 7 | use super::Key; 8 | use crate::disk::PageId; 9 | use crate::slotted::{self, Slotted}; 10 | 11 | #[derive(Debug, FromBytes, AsBytes)] 12 | #[repr(C)] 13 | pub struct Header { 14 | prev_page_id: PageId, 15 | next_page_id: PageId, 16 | } 17 | pub struct Record { 18 | key: LayoutVerified, 19 | pub value: B, 20 | } 21 | 22 | impl Record { 23 | pub fn new(bytes: B) -> Option { 24 | let (key, value) = LayoutVerified::new_from_prefix(bytes)?; 25 | Some(Self { key, value }) 26 | } 27 | 28 | pub fn len(&self) -> usize { 29 | size_of::() + self.value.len() 30 | } 31 | 32 | pub fn key(&self) -> Key { 33 | let mut key = Key::default(); 34 | key.copy_from_slice(&self.key[..]); 35 | key 36 | } 37 | } 38 | 39 | pub struct Leaf { 40 | header: LayoutVerified, 41 | body: Slotted, 42 | } 43 | 44 | impl Leaf { 45 | pub fn new(bytes: B) -> Option { 46 | let (header, body) = LayoutVerified::new_from_prefix(bytes)?; 47 | let body = Slotted::new(body)?; 48 | Some(Self { header, body }) 49 | } 50 | 51 | pub fn prev_page_id(&self) -> Option { 52 | self.header.prev_page_id.valid() 53 | } 54 | 55 | pub fn next_page_id(&self) -> Option { 56 | self.header.next_page_id.valid() 57 | } 58 | 59 | pub fn num_records(&self) -> usize { 60 | self.body.num_slots() 61 | } 62 | 63 | pub fn find(&self, key: Key) -> Result { 64 | use std::cmp::Ordering::{Equal, Less}; 65 | if self.num_records() == 0 { 66 | return Err(0); 67 | } 68 | let mut base = 0; 69 | let mut size = self.num_records(); 70 | while size > 1 { 71 | let half = size / 2; 72 | let mid = base + half; 73 | base = if self.record(mid).key.as_ref() > key.as_ref() { 74 | base 75 | } else { 76 | mid 77 | }; 78 | size -= half; 79 | } 80 | let cmp = self.record(base).key.cmp(&key); 81 | if cmp == Equal { 82 | Ok(base) 83 | } else { 84 | Err(base + (cmp == Less) as usize) 85 | } 86 | } 87 | 88 | pub fn get(&self, key: Key) -> Option<&[u8]> { 89 | let slot_id = self.find(key).ok()?; 90 | Some(&self.record(slot_id).value) 91 | } 92 | 93 | pub fn record(&self, slot_id: usize) -> Record<&[u8]> { 94 | Record::new(&self.body[slot_id]).unwrap() 95 | } 96 | 97 | pub fn max_value_size(&self) -> usize { 98 | self.body.capacity() / 2 - size_of::() - size_of::() 99 | } 100 | } 101 | 102 | impl Leaf { 103 | pub fn initialize(&mut self) { 104 | self.header.prev_page_id = PageId::INVALID_PAGE_ID; 105 | self.header.next_page_id = PageId::INVALID_PAGE_ID; 106 | self.body.initialize(); 107 | } 108 | 109 | pub fn set_prev_page_id(&mut self, prev_page_id: Option) { 110 | self.header.prev_page_id = prev_page_id.into() 111 | } 112 | 113 | pub fn set_next_page_id(&mut self, next_page_id: Option) { 114 | self.header.next_page_id = next_page_id.into() 115 | } 116 | 117 | fn record_mut(&mut self, slot_id: usize) -> Record<&mut [u8]> { 118 | Record::new(&mut self.body[slot_id]).unwrap() 119 | } 120 | 121 | #[must_use = "insertion may fail"] 122 | pub fn put(&mut self, key: Key, value: &[u8]) -> bool { 123 | assert!(value.len() <= self.max_value_size()); 124 | match self.find(key) { 125 | Ok(index) => { 126 | if self 127 | .body 128 | .resize(index, size_of::() + value.len()) 129 | .is_some() 130 | { 131 | let mut record = self.record_mut(index); 132 | record.key.copy_from_slice(&key); 133 | record.value.copy_from_slice(value); 134 | return true; 135 | } 136 | } 137 | Err(index) => { 138 | if self.body.insert(index, size_of::() + value.len()).is_some() { 139 | let mut record = self.record_mut(index); 140 | record.key.copy_from_slice(&key); 141 | record.value.copy_from_slice(value); 142 | return true; 143 | } 144 | } 145 | } 146 | false 147 | } 148 | 149 | fn allocate_last(&mut self, len: usize) -> Record<&mut [u8]> { 150 | let next = self.num_records(); 151 | self.body.insert(next, len).unwrap(); 152 | self.record_mut(next) 153 | } 154 | 155 | fn push_record(&mut self, record: &Record<&[u8]>) { 156 | let mut target = self.allocate_last(record.len()); 157 | target.key.copy_from_slice(record.key.as_ref()); 158 | target.value.copy_from_slice(record.value); 159 | } 160 | 161 | fn push_key_value(&mut self, key: Key, value: &[u8]) { 162 | let record = Record { 163 | key: LayoutVerified::new(&key[..]).unwrap(), 164 | value 165 | }; 166 | self.push_record(&record); 167 | } 168 | 169 | pub fn split_put(&mut self, new_leaf: &mut Leaf, new_key: Key, new_value: &[u8]) -> Key { 170 | use std::cmp::Ordering; 171 | loop { 172 | if self.body.free_space() > new_leaf.body.free_space() { 173 | break; 174 | } 175 | let num_records = self.num_records(); 176 | if num_records <= 1 { 177 | break; 178 | } 179 | let last = num_records - 1; 180 | let record = self.record(last); 181 | let cmp = new_key.cmp(&record.key); 182 | if cmp == Ordering::Less { 183 | new_leaf.push_record(&record); 184 | self.body.remove(last); 185 | } else { 186 | new_leaf.push_key_value(new_key, new_value); 187 | if cmp == Ordering::Equal { 188 | self.body.remove(last); 189 | } 190 | loop { 191 | if self.body.free_space() > new_leaf.body.free_space() { 192 | break; 193 | } 194 | let num_records = self.num_records(); 195 | if num_records <= 1 { 196 | break; 197 | } 198 | let last = num_records - 1; 199 | let record = self.record(last); 200 | new_leaf.push_record(&record); 201 | self.body.remove(last); 202 | } 203 | new_leaf.body.reverse(); 204 | let first = new_leaf.record(0); 205 | return first.key(); 206 | } 207 | } 208 | new_leaf.body.reverse(); 209 | assert!(self.put(new_key, new_value)); 210 | let first = new_leaf.record(0); 211 | first.key() 212 | } 213 | } 214 | 215 | #[cfg(test)] 216 | mod tests { 217 | use super::*; 218 | 219 | #[test] 220 | fn test_leaf_find() { 221 | let mut page_data = vec![0; 100]; 222 | let mut leaf_page = Leaf::new(page_data.as_mut_slice()).unwrap(); 223 | leaf_page.initialize(); 224 | leaf_page.body.insert(0, 8).unwrap(); 225 | leaf_page.body.insert(1, 8).unwrap(); 226 | leaf_page.body.insert(2, 8).unwrap(); 227 | leaf_page.body[0].copy_from_slice(b"deadbeef"); 228 | leaf_page.body[1].copy_from_slice(b"deadbeeh"); 229 | leaf_page.body[2].copy_from_slice(b"deadbeek"); 230 | assert_eq!(Ok(1), leaf_page.find(*b"deadbeeh")); 231 | assert_eq!(Err(1), leaf_page.find(*b"deadbeeg")); 232 | assert_eq!(Err(3), leaf_page.find(*b"deadbeez")); 233 | } 234 | 235 | #[test] 236 | fn test_leaf_insert() { 237 | let mut page_data = vec![0; 100]; 238 | let mut leaf_page = Leaf::new(page_data.as_mut_slice()).unwrap(); 239 | leaf_page.initialize(); 240 | assert!(leaf_page.put(*b"deadbeef", b"world")); 241 | assert!(leaf_page.put(*b"facebook", b"!")); 242 | assert!(leaf_page.put(*b"beefdead", b"hello")); 243 | assert_eq!(Some(&b"hello"[..]), leaf_page.get(*b"beefdead")); 244 | } 245 | 246 | #[test] 247 | fn test_leaf_split_insert() { 248 | let mut page_data = vec![0; 54]; 249 | let mut leaf_page = Leaf::new(page_data.as_mut_slice()).unwrap(); 250 | leaf_page.initialize(); 251 | assert!(leaf_page.put(*b"deadbeef", b"world")); 252 | assert!(leaf_page.put(*b"facebook", b"!")); 253 | assert!(!leaf_page.put(*b"beefdead", b"hello")); 254 | let mut leaf_page = Leaf::new(page_data.as_mut_slice()).unwrap(); 255 | let mut new_page_data = vec![0; 54]; 256 | let mut new_leaf_page = Leaf::new(new_page_data.as_mut_slice()).unwrap(); 257 | new_leaf_page.initialize(); 258 | leaf_page.split_put(&mut new_leaf_page, *b"beefdead", b"hello"); 259 | assert_eq!(Some(&b"world"[..]), leaf_page.get(*b"deadbeef")); 260 | } 261 | } 262 | -------------------------------------------------------------------------------- /src/btree/node.rs: -------------------------------------------------------------------------------- 1 | use zerocopy::{AsBytes, ByteSlice, ByteSliceMut, FromBytes, LayoutVerified}; 2 | 3 | use super::branch::Branch; 4 | use super::leaf::Leaf; 5 | 6 | #[derive(Debug, Clone, Copy, Eq, PartialEq)] 7 | #[repr(u8)] 8 | pub enum NodeType { 9 | Leaf = 1, 10 | Branch = 2, 11 | } 12 | 13 | #[derive(Debug, FromBytes, AsBytes)] 14 | #[repr(C)] 15 | pub struct Header { 16 | node_type: u8, 17 | _pad: [u8; 7], 18 | } 19 | 20 | impl Header { 21 | fn node_type(&self) -> NodeType { 22 | if self.node_type == NodeType::Leaf as u8 { 23 | return NodeType::Leaf; 24 | } 25 | if self.node_type == NodeType::Branch as u8 { 26 | return NodeType::Branch; 27 | } 28 | unreachable!() 29 | } 30 | 31 | fn set_node_type(&mut self, node_type: NodeType) { 32 | self.node_type = node_type as u8; 33 | } 34 | } 35 | 36 | pub struct NodePage { 37 | header: LayoutVerified, 38 | body: B, 39 | } 40 | 41 | impl NodePage { 42 | pub fn new(bytes: B) -> Option { 43 | let (header, body) = LayoutVerified::new_from_prefix(bytes)?; 44 | Some(Self { header, body }) 45 | } 46 | 47 | pub fn node(&self) -> Node<&[u8]> { 48 | match self.header.node_type() { 49 | NodeType::Leaf => Node::Leaf(Leaf::new(self.body.deref()).unwrap()), 50 | NodeType::Branch => Node::Branch(Branch::new(self.body.deref()).unwrap()), 51 | } 52 | } 53 | } 54 | 55 | impl NodePage { 56 | pub fn initialize_as_leaf(&mut self) -> Leaf<&mut [u8]> { 57 | self.header.set_node_type(NodeType::Leaf); 58 | Leaf::new(self.body.deref_mut()).unwrap() 59 | } 60 | 61 | pub fn initialize_as_branch(&mut self) -> Branch<&mut [u8]> { 62 | self.header.set_node_type(NodeType::Branch); 63 | Branch::new(self.body.deref_mut()).unwrap() 64 | } 65 | 66 | pub fn node_mut(&mut self) -> Node<&mut [u8]> { 67 | match self.header.node_type() { 68 | NodeType::Leaf => Node::Leaf(Leaf::new(self.body.deref_mut()).unwrap()), 69 | NodeType::Branch => Node::Branch(Branch::new(self.body.deref_mut()).unwrap()), 70 | } 71 | } 72 | } 73 | 74 | pub enum Node { 75 | Leaf(Leaf), 76 | Branch(Branch), 77 | } 78 | 79 | impl Node { 80 | pub fn try_into_leaf(self) -> Result, Self> { 81 | match self { 82 | Node::Leaf(leaf) => Ok(leaf), 83 | _ => Err(self), 84 | } 85 | } 86 | 87 | #[allow(dead_code)] 88 | pub fn try_into_branch(self) -> Result, Self> { 89 | match self { 90 | Node::Branch(branch) => Ok(branch), 91 | _ => Err(self), 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/buffer.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::HashMap, io, sync::Arc}; 2 | 3 | use parking_lot::{Mutex, RwLock}; 4 | use thiserror::Error; 5 | 6 | use crate::disk::{DiskManager, PageId, PAGE_SIZE}; 7 | 8 | pub type Page = [u8; PAGE_SIZE]; 9 | 10 | #[derive(Debug, Error)] 11 | pub enum Error { 12 | #[error(transparent)] 13 | Io(#[from] io::Error), 14 | #[error("no free buffer available in buffer pool")] 15 | NoFreeBuffer, 16 | } 17 | 18 | #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] 19 | pub struct BufferId(usize); 20 | 21 | #[derive(Debug)] 22 | pub struct Buffer { 23 | pub page: Page, 24 | pub is_dirty: bool, 25 | } 26 | 27 | impl Default for Buffer { 28 | fn default() -> Self { 29 | Self { 30 | page: [0u8; PAGE_SIZE], 31 | is_dirty: false, 32 | } 33 | } 34 | } 35 | 36 | #[derive(Debug, Default)] 37 | pub struct Frame { 38 | usage_count: u64, 39 | page_id: PageId, 40 | buffer: Arc>, 41 | } 42 | 43 | pub struct BufferPool { 44 | page_table: HashMap, 45 | next_victim: usize, 46 | buffers: Vec, 47 | } 48 | 49 | impl BufferPool { 50 | pub fn new(pool_size: usize) -> Self { 51 | let page_table = HashMap::new(); 52 | let next_victim = 0; 53 | let mut buffers = vec![]; 54 | buffers.resize_with(pool_size, Default::default); 55 | Self { 56 | page_table, 57 | next_victim, 58 | buffers, 59 | } 60 | } 61 | 62 | fn evict(&mut self) -> Option<(BufferId, &mut Frame)> { 63 | let pool_size = self.buffers.len(); 64 | let mut consecutive_used = 0; 65 | let victim_idx = loop { 66 | let frame = &mut self.buffers[self.next_victim]; 67 | if frame.usage_count == 0 { 68 | break self.next_victim; 69 | } 70 | if Arc::get_mut(&mut frame.buffer).is_some() { 71 | frame.usage_count -= 1; 72 | consecutive_used = 0; 73 | } else { 74 | consecutive_used += 1; 75 | if consecutive_used >= pool_size { 76 | return None; 77 | } 78 | } 79 | self.next_victim = (self.next_victim + 1) % pool_size; 80 | }; 81 | let frame = &mut self.buffers[victim_idx]; 82 | frame.usage_count = 1; 83 | let victim_page_id = frame.page_id; 84 | self.page_table.remove(&victim_page_id); 85 | Some((BufferId(victim_idx), frame)) 86 | } 87 | } 88 | 89 | pub struct BufferPoolManager { 90 | disk: Mutex, 91 | pool: Mutex, 92 | } 93 | 94 | impl BufferPoolManager { 95 | pub fn new(disk: DiskManager, pool: BufferPool) -> Self { 96 | Self { 97 | disk: Mutex::new(disk), 98 | pool: Mutex::new(pool), 99 | } 100 | } 101 | 102 | pub fn fetch_page(&self, page_id: PageId) -> Result>, Error> { 103 | let mut locked_pool = self.pool.lock(); 104 | if let Some(&frame_id) = locked_pool.page_table.get(&page_id) { 105 | let frame = &mut locked_pool.buffers[frame_id.0]; 106 | frame.usage_count += 1; 107 | return Ok(frame.buffer.clone()); 108 | } 109 | let (frame_id, frame) = locked_pool.evict().ok_or(Error::NoFreeBuffer)?; 110 | let evict_page_id = frame.page_id; 111 | { 112 | let buffer = Arc::get_mut(&mut frame.buffer).unwrap().get_mut(); 113 | let mut locked_disk = self.disk.lock(); 114 | if buffer.is_dirty { 115 | locked_disk.write_page_data(evict_page_id, &buffer.page)?; 116 | } 117 | frame.page_id = page_id; 118 | buffer.is_dirty = false; 119 | locked_disk.read_page_data(page_id, &mut buffer.page)?; 120 | } 121 | let page = Arc::clone(&frame.buffer); 122 | locked_pool.page_table.remove(&evict_page_id); 123 | locked_pool.page_table.insert(page_id, frame_id); 124 | Ok(page) 125 | } 126 | 127 | pub fn create_page(&self) -> Result<(PageId, Arc>), Error> { 128 | let mut locked_pool = self.pool.lock(); 129 | let (frame_id, frame) = locked_pool.evict().ok_or(Error::NoFreeBuffer)?; 130 | let evict_page_id = frame.page_id; 131 | let page_id = { 132 | let buffer = Arc::get_mut(&mut frame.buffer).unwrap().get_mut(); 133 | let mut locked_disk = self.disk.lock(); 134 | if buffer.is_dirty { 135 | locked_disk.write_page_data(evict_page_id, &buffer.page)?; 136 | } 137 | let page_id = locked_disk.allocate_page(); 138 | frame.page_id = page_id; 139 | *buffer = Buffer::default(); 140 | buffer.is_dirty = true; 141 | page_id 142 | }; 143 | let buffer = Arc::clone(&frame.buffer); 144 | locked_pool.page_table.remove(&evict_page_id); 145 | locked_pool.page_table.insert(page_id, frame_id); 146 | Ok((page_id, buffer)) 147 | } 148 | 149 | pub fn flush(&self) -> Result<(), Error> { 150 | let locked_pool = self.pool.lock(); 151 | let mut locked_disk = self.disk.lock(); 152 | for (page_id, frame_id) in locked_pool.page_table.iter() { 153 | let frame = &locked_pool.buffers[frame_id.0]; 154 | let mut rw_buffer = frame.buffer.write(); 155 | locked_disk.write_page_data(*page_id, &rw_buffer.page)?; 156 | rw_buffer.is_dirty = false; 157 | } 158 | locked_disk.flush()?; 159 | Ok(()) 160 | } 161 | } 162 | 163 | #[cfg(test)] 164 | mod tests { 165 | use super::*; 166 | use tempfile::tempfile; 167 | 168 | #[test] 169 | fn test() { 170 | let mut hello = Vec::with_capacity(PAGE_SIZE); 171 | hello.extend_from_slice(b"hello"); 172 | hello.resize(PAGE_SIZE, 0); 173 | let mut world = Vec::with_capacity(PAGE_SIZE); 174 | world.extend_from_slice(b"world"); 175 | world.resize(PAGE_SIZE, 0); 176 | 177 | let disk = DiskManager::new(tempfile().unwrap()).unwrap(); 178 | let pool = BufferPool::new(1); 179 | let bufmgr = BufferPoolManager::new(disk, pool); 180 | let page1_id = { 181 | let (page_id, buffer) = bufmgr.create_page().unwrap(); 182 | assert!(bufmgr.create_page().is_err()); 183 | let mut rw_buffer = buffer.write(); 184 | rw_buffer.page.copy_from_slice(&hello); 185 | rw_buffer.is_dirty = true; 186 | page_id 187 | }; 188 | { 189 | let buffer = bufmgr.fetch_page(page1_id).unwrap(); 190 | let ro_buffer = buffer.read(); 191 | assert_eq!(&hello, &ro_buffer.page); 192 | } 193 | let page2_id = { 194 | let (page_id, buffer) = bufmgr.create_page().unwrap(); 195 | let mut rw_buffer = buffer.write(); 196 | rw_buffer.page.copy_from_slice(&world); 197 | rw_buffer.is_dirty = true; 198 | page_id 199 | }; 200 | { 201 | let buffer = bufmgr.fetch_page(page1_id).unwrap(); 202 | let ro_buffer = buffer.read(); 203 | assert_eq!(&hello, &ro_buffer.page); 204 | } 205 | { 206 | let buffer = bufmgr.fetch_page(page2_id).unwrap(); 207 | let ro_buffer = buffer.read(); 208 | assert_eq!(&world, &ro_buffer.page); 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /src/disk.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | convert::{TryFrom, TryInto}, 3 | io::{prelude::*, SeekFrom}, 4 | }; 5 | use std::{fs::File, fs::OpenOptions, path::Path}; 6 | 7 | use zerocopy::{AsBytes, FromBytes}; 8 | 9 | pub const PAGE_SIZE: usize = 4096; 10 | 11 | #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, FromBytes, AsBytes)] 12 | #[repr(C)] 13 | pub struct PageId(pub u64); 14 | impl PageId { 15 | pub const CATALOG_PAGE_ID: PageId = PageId(0); 16 | pub const INVALID_PAGE_ID: PageId = PageId(u64::MAX); 17 | 18 | pub fn valid(self) -> Option { 19 | if self == Self::INVALID_PAGE_ID { 20 | None 21 | } else { 22 | Some(self) 23 | } 24 | } 25 | } 26 | impl Default for PageId { 27 | fn default() -> Self { 28 | Self::INVALID_PAGE_ID 29 | } 30 | } 31 | impl From<[u8; 8]> for PageId { 32 | fn from(bytes: [u8; 8]) -> Self { 33 | PageId(u64::from_be_bytes(bytes)) 34 | } 35 | } 36 | impl From for [u8; 8] { 37 | fn from(page_id: PageId) -> Self { 38 | page_id.0.to_be_bytes() 39 | } 40 | } 41 | impl From> for PageId { 42 | fn from(page_id: Option) -> Self { 43 | page_id.unwrap_or_default() 44 | } 45 | } 46 | impl<'a> TryFrom<&'a [u8]> for PageId { 47 | type Error = std::array::TryFromSliceError; 48 | 49 | fn try_from(value: &'a [u8]) -> Result { 50 | let array: [u8; 8] = value.try_into()?; 51 | Ok(array.into()) 52 | } 53 | } 54 | 55 | pub struct DiskManager { 56 | data_file: File, 57 | next_page_id: u64, 58 | } 59 | 60 | impl DiskManager { 61 | pub fn new(data_file: File) -> std::io::Result { 62 | let next_page_id = data_file.metadata()?.len() / PAGE_SIZE as u64; 63 | Ok(Self { 64 | data_file, 65 | next_page_id, 66 | }) 67 | } 68 | 69 | pub fn open(data_file_path: impl AsRef) -> std::io::Result { 70 | let data_file = OpenOptions::new() 71 | .read(true) 72 | .write(true) 73 | .create(true) 74 | .open(data_file_path)?; 75 | Self::new(data_file) 76 | } 77 | 78 | pub fn read_page_data(&mut self, page_id: PageId, data: &mut [u8]) -> std::io::Result<()> { 79 | let offset = PAGE_SIZE as u64 * page_id.0; 80 | self.data_file.seek(SeekFrom::Start(offset))?; 81 | self.data_file.read_exact(data) 82 | } 83 | 84 | pub fn write_page_data(&mut self, page_id: PageId, data: &[u8]) -> std::io::Result<()> { 85 | let offset = PAGE_SIZE as u64 * page_id.0; 86 | self.data_file.seek(SeekFrom::Start(offset))?; 87 | self.data_file.write_all(data) 88 | } 89 | 90 | pub fn flush(&mut self) -> std::io::Result<()> { 91 | self.data_file.flush()?; 92 | self.data_file.sync_all() 93 | } 94 | 95 | pub fn allocate_page(&mut self) -> PageId { 96 | let page_id = self.next_page_id; 97 | self.next_page_id += 1; 98 | PageId(page_id) 99 | } 100 | } 101 | 102 | #[cfg(test)] 103 | mod tests { 104 | use super::*; 105 | use tempfile::NamedTempFile; 106 | 107 | #[test] 108 | fn test() { 109 | let (data_file, data_file_path) = NamedTempFile::new().unwrap().into_parts(); 110 | let mut disk = DiskManager::new(data_file).unwrap(); 111 | let mut hello = Vec::with_capacity(PAGE_SIZE); 112 | hello.extend_from_slice(b"hello"); 113 | hello.resize(PAGE_SIZE, 0); 114 | let hello_page_id = disk.allocate_page(); 115 | disk.write_page_data(hello_page_id, &hello).unwrap(); 116 | let mut world = Vec::with_capacity(PAGE_SIZE); 117 | world.extend_from_slice(b"world"); 118 | world.resize(PAGE_SIZE, 0); 119 | let world_page_id = disk.allocate_page(); 120 | disk.write_page_data(world_page_id, &world).unwrap(); 121 | drop(disk); 122 | let mut disk2 = DiskManager::open(&data_file_path).unwrap(); 123 | let mut buf = vec![0; PAGE_SIZE]; 124 | disk2.read_page_data(hello_page_id, &mut buf).unwrap(); 125 | assert_eq!(hello, buf); 126 | disk2.read_page_data(world_page_id, &mut buf).unwrap(); 127 | assert_eq!(world, buf); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /src/executor.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryInto, sync::Arc}; 2 | 3 | use crate::{ 4 | btree, 5 | buffer::BufferPoolManager, 6 | disk::PageId, 7 | query::CreateTableOutput, 8 | query::FlushInput, 9 | query::{ 10 | self, CreateTableInput, DeleteItemInput, DeleteItemOutput, FlushOutput, GetItemInput, 11 | GetItemOutput, PutItemInput, PutItemOutput, Request, Response, ScanItemInput, 12 | ScanItemOutput, 13 | }, 14 | }; 15 | 16 | pub struct Executor { 17 | bufmgr: Arc, 18 | } 19 | 20 | impl Executor { 21 | pub fn new(bufmgr: Arc) -> Self { 22 | Self { bufmgr } 23 | } 24 | 25 | pub fn execute(&self, request: Request) -> query::Response { 26 | let resp = match request { 27 | Request::GetItem(input) => self.get_item(input).map(Response::GetItem), 28 | Request::PutItem(input) => self.put_item(input).map(Response::PutItem), 29 | Request::DeleteItem(input) => self.delete_item(input).map(Response::DeleteItem), 30 | Request::CreateTable(input) => self.create_table(input).map(Response::CreateTable), 31 | Request::ScanItem(input) => self.scan_item(input).map(Response::ScanItem), 32 | Request::Flush(input) => self.flush(input).map(Response::Flush), 33 | }; 34 | resp.map_err(|err| match err.downcast_ref::() { 35 | Some(btree::Error::Deadlock) => query::Error::Deadlock, 36 | _ => query::Error::Other { 37 | message: err.to_string(), 38 | }, 39 | }) 40 | .unwrap_or_else(Response::Error) 41 | } 42 | 43 | fn lookup_table(&self, table_id: btree::Key) -> Result { 44 | let catalog = btree::Access::open(&self.bufmgr, PageId::CATALOG_PAGE_ID); 45 | let mut buf = vec![]; 46 | if !catalog.get(table_id, &mut buf)? { 47 | return Err(anyhow::anyhow!("no such table")); 48 | } 49 | Ok(buf[..].try_into()?) 50 | } 51 | 52 | fn get_item(&self, input: GetItemInput) -> Result { 53 | let page_id = self.lookup_table(input.table_id.into())?; 54 | let table_access = btree::Access::open(&self.bufmgr, page_id); 55 | let mut buf = vec![]; 56 | if !table_access.get(input.key.into(), &mut buf)? { 57 | return Ok(GetItemOutput { item: None }); 58 | } 59 | let item = query::Item { 60 | key: input.key, 61 | value: String::from_utf8(buf)?, 62 | }; 63 | Ok(GetItemOutput { item: Some(item) }) 64 | } 65 | 66 | fn put_item(&self, input: PutItemInput) -> Result { 67 | let page_id = self.lookup_table(input.table_id.into())?; 68 | let table_access = btree::Access::open(&self.bufmgr, page_id); 69 | table_access.put(input.item.key.into(), input.item.value.as_bytes())?; 70 | Ok(PutItemOutput) 71 | } 72 | 73 | fn delete_item(&self, _input: DeleteItemInput) -> Result { 74 | todo!(); 75 | } 76 | 77 | fn scan_item(&self, input: ScanItemInput) -> Result { 78 | let page_id = self.lookup_table(input.table_id.into())?; 79 | let table_access = btree::Access::open(&self.bufmgr, page_id); 80 | let mut items = vec![]; 81 | let mut buf = vec![]; 82 | let mut count = 0; 83 | if input.backward { 84 | let mut iter = table_access.iter_rev(input.start.map(Into::into))?; 85 | while let Some(key) = iter.next(&mut buf)? { 86 | let key = key.into(); 87 | let value = String::from_utf8(buf.clone())?; 88 | buf.clear(); 89 | items.push(query::Item { key, value }); 90 | count += 1; 91 | if count >= input.limit { 92 | break; 93 | } 94 | } 95 | } else { 96 | let mut iter = table_access.iter(input.start.map(Into::into))?; 97 | while let Some(key) = iter.next(&mut buf)? { 98 | let key = key.into(); 99 | let value = String::from_utf8(buf.clone())?; 100 | buf.clear(); 101 | items.push(query::Item { key, value }); 102 | count += 1; 103 | if count >= input.limit { 104 | break; 105 | } 106 | } 107 | } 108 | Ok(ScanItemOutput { items }) 109 | } 110 | 111 | fn create_table(&self, input: CreateTableInput) -> Result { 112 | let catalog = btree::Access::open(&self.bufmgr, PageId::CATALOG_PAGE_ID); 113 | let new_table = btree::Access::create(&self.bufmgr)?; 114 | let bytes: [u8; 8] = new_table.btree_page_id.into(); 115 | catalog.put(input.table_id.into(), &bytes)?; 116 | Ok(CreateTableOutput) 117 | } 118 | 119 | fn flush(&self, _input: FlushInput) -> Result { 120 | self.bufmgr.flush()?; 121 | Ok(FlushOutput) 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/latch.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | marker::PhantomData, 3 | ops::{Deref, DerefMut}, 4 | sync::Arc, 5 | }; 6 | 7 | use parking_lot::lock_api::{RawRwLock, RwLock}; 8 | 9 | pub trait OwnedRwLockExt 10 | where 11 | R: RawRwLock, 12 | { 13 | unsafe fn raw(&self) -> &R; 14 | unsafe fn read_guard_owned(self: Arc) -> OwnedRwLockReadGuard; 15 | unsafe fn write_guard_owned(self: Arc) -> OwnedRwLockWriteGuard; 16 | 17 | fn read_owned(self: Arc) -> OwnedRwLockReadGuard { 18 | unsafe { self.raw().lock_shared() }; 19 | unsafe { self.read_guard_owned() } 20 | } 21 | 22 | fn write_owned(self: Arc) -> OwnedRwLockWriteGuard { 23 | unsafe { self.raw().lock_exclusive() }; 24 | unsafe { self.write_guard_owned() } 25 | } 26 | 27 | fn try_read_owned(self: Arc) -> Option> { 28 | if unsafe { self.raw().try_lock_shared() } { 29 | Some(unsafe { self.read_guard_owned() }) 30 | } else { 31 | None 32 | } 33 | } 34 | 35 | fn try_write_owned(self: Arc) -> Option> { 36 | if unsafe { self.raw().try_lock_exclusive() } { 37 | Some(unsafe { self.write_guard_owned() }) 38 | } else { 39 | None 40 | } 41 | } 42 | } 43 | 44 | impl OwnedRwLockExt for RwLock 45 | where 46 | R: RawRwLock, 47 | { 48 | unsafe fn raw(&self) -> &R { 49 | RwLock::raw(self) 50 | } 51 | 52 | unsafe fn read_guard_owned(self: Arc) -> OwnedRwLockReadGuard { 53 | OwnedRwLockReadGuard { 54 | rwlock: self, 55 | marker: PhantomData, 56 | } 57 | } 58 | 59 | unsafe fn write_guard_owned(self: Arc) -> OwnedRwLockWriteGuard { 60 | OwnedRwLockWriteGuard { 61 | rwlock: self, 62 | marker: PhantomData, 63 | } 64 | } 65 | } 66 | 67 | pub struct OwnedRwLockReadGuard 68 | where 69 | R: RawRwLock, 70 | { 71 | rwlock: Arc>, 72 | marker: PhantomData, 73 | } 74 | unsafe impl Sync for OwnedRwLockReadGuard {} 75 | 76 | impl Deref for OwnedRwLockReadGuard 77 | where 78 | R: RawRwLock, 79 | { 80 | type Target = T; 81 | 82 | fn deref(&self) -> &Self::Target { 83 | unsafe { 84 | self.rwlock 85 | .data_ptr() 86 | .as_ref() 87 | .expect("Arc must not point to null") 88 | } 89 | } 90 | } 91 | 92 | impl Drop for OwnedRwLockReadGuard 93 | where 94 | R: RawRwLock, 95 | { 96 | fn drop(&mut self) { 97 | unsafe { 98 | self.rwlock.raw().unlock_shared(); 99 | } 100 | } 101 | } 102 | 103 | pub struct OwnedRwLockWriteGuard 104 | where 105 | R: RawRwLock, 106 | { 107 | rwlock: Arc>, 108 | marker: PhantomData, 109 | } 110 | unsafe impl Sync for OwnedRwLockWriteGuard {} 111 | 112 | impl Deref for OwnedRwLockWriteGuard 113 | where 114 | R: RawRwLock, 115 | { 116 | type Target = T; 117 | 118 | fn deref(&self) -> &Self::Target { 119 | unsafe { 120 | self.rwlock 121 | .data_ptr() 122 | .as_ref() 123 | .expect("Arc must not point to null") 124 | } 125 | } 126 | } 127 | 128 | impl DerefMut for OwnedRwLockWriteGuard 129 | where 130 | R: RawRwLock, 131 | { 132 | fn deref_mut(&mut self) -> &mut Self::Target { 133 | unsafe { 134 | self.rwlock 135 | .data_ptr() 136 | .as_mut() 137 | .expect("Arc must not point to null") 138 | } 139 | } 140 | } 141 | 142 | impl Drop for OwnedRwLockWriteGuard 143 | where 144 | R: RawRwLock, 145 | { 146 | fn drop(&mut self) { 147 | unsafe { 148 | self.rwlock.raw().unlock_exclusive(); 149 | } 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | mod btree; 2 | mod buffer; 3 | mod disk; 4 | mod executor; 5 | mod latch; 6 | mod query; 7 | mod slotted; 8 | 9 | use std::env; 10 | use std::thread; 11 | use std::{ 12 | io::Write, 13 | io::{BufRead, BufReader}, 14 | net::TcpListener, 15 | net::TcpStream, 16 | sync::Arc, 17 | }; 18 | 19 | use buffer::{BufferPool, BufferPoolManager}; 20 | use disk::DiskManager; 21 | use executor::Executor; 22 | 23 | fn main() -> Result<(), anyhow::Error> { 24 | let mut args = env::args_os(); 25 | args.next(); 26 | 27 | let qp_filename = args.next().expect("qp filename is required"); 28 | let disk = DiskManager::open(qp_filename)?; 29 | let pool = BufferPool::new(5); 30 | let bufmgr = Arc::new(BufferPoolManager::new(disk, pool)); 31 | let listener = TcpListener::bind("0.0.0.0:8124")?; 32 | 33 | for stream in listener.incoming() { 34 | let stream = stream.unwrap(); 35 | let executor = Executor::new(bufmgr.clone()); 36 | thread::spawn(move || Handler::new(executor).handle(stream)); 37 | } 38 | 39 | Ok(()) 40 | } 41 | 42 | struct Handler { 43 | executor: Executor, 44 | } 45 | 46 | impl Handler { 47 | fn new(executor: Executor) -> Self { 48 | Self { executor } 49 | } 50 | 51 | fn handle(&self, stream: TcpStream) -> Result<(), anyhow::Error> { 52 | let buf_read = BufReader::new(&stream); 53 | for line in buf_read.lines() { 54 | let line = line?; 55 | let response = self.handle_request(&line).unwrap_or_else(|err| { 56 | query::Response::Error(query::Error::Other { 57 | message: err.to_string(), 58 | }) 59 | }); 60 | serde_json::to_writer(&stream, &response)?; 61 | (&stream).write_all(b"\n")? 62 | } 63 | Ok(()) 64 | } 65 | 66 | fn handle_request(&self, line: &str) -> Result { 67 | let request: query::Request = serde_json::from_str(&line)?; 68 | Ok(self.executor.execute(request)) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/query.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use crate::btree; 4 | 5 | #[derive(Debug, Deserialize)] 6 | #[serde(tag = "type")] 7 | pub enum Request { 8 | GetItem(GetItemInput), 9 | PutItem(PutItemInput), 10 | DeleteItem(DeleteItemInput), 11 | CreateTable(CreateTableInput), 12 | ScanItem(ScanItemInput), 13 | Flush(FlushInput), 14 | } 15 | 16 | #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] 17 | pub struct Key([u8; 8]); 18 | impl Serialize for Key { 19 | fn serialize(&self, serializer: S) -> Result 20 | where 21 | S: serde::Serializer, 22 | { 23 | hex::serialize_upper(self.0, serializer) 24 | } 25 | } 26 | 27 | impl<'de> Deserialize<'de> for Key { 28 | fn deserialize(deserializer: D) -> Result 29 | where 30 | D: serde::Deserializer<'de>, 31 | { 32 | Ok(Key(hex::deserialize(deserializer)?)) 33 | } 34 | } 35 | impl From for btree::Key { 36 | fn from(Key(bytes): Key) -> Self { 37 | bytes 38 | } 39 | } 40 | impl From for Key { 41 | fn from(bytes: btree::Key) -> Self { 42 | Key(bytes) 43 | } 44 | } 45 | 46 | #[derive(Debug, Serialize, Deserialize)] 47 | pub struct Item { 48 | pub key: Key, 49 | pub value: String, 50 | } 51 | 52 | #[derive(Debug, Deserialize)] 53 | pub struct GetItemInput { 54 | pub table_id: Key, 55 | pub key: Key, 56 | } 57 | 58 | #[derive(Debug, Deserialize)] 59 | pub struct PutItemInput { 60 | pub table_id: Key, 61 | pub item: Item, 62 | } 63 | 64 | #[derive(Debug, Deserialize)] 65 | pub struct DeleteItemInput { 66 | pub table_id: Key, 67 | pub key: Key, 68 | } 69 | 70 | #[derive(Debug, Deserialize)] 71 | pub struct ScanItemInput { 72 | pub table_id: Key, 73 | pub start: Option, 74 | pub backward: bool, 75 | pub limit: usize, 76 | } 77 | 78 | #[derive(Debug, Deserialize)] 79 | pub struct CreateTableInput { 80 | pub table_id: Key, 81 | } 82 | 83 | #[derive(Debug, Deserialize)] 84 | pub struct FlushInput; 85 | 86 | #[derive(Debug, Serialize)] 87 | #[serde(tag = "type")] 88 | pub enum Response { 89 | GetItem(GetItemOutput), 90 | PutItem(PutItemOutput), 91 | DeleteItem(DeleteItemOutput), 92 | ScanItem(ScanItemOutput), 93 | CreateTable(CreateTableOutput), 94 | Flush(FlushOutput), 95 | Error(Error), 96 | } 97 | 98 | #[derive(Debug, Serialize)] 99 | pub struct GetItemOutput { 100 | pub item: Option, 101 | } 102 | 103 | #[derive(Debug, Serialize)] 104 | pub struct PutItemOutput; 105 | 106 | #[derive(Debug, Serialize)] 107 | pub struct DeleteItemOutput { 108 | pub found: bool, 109 | } 110 | 111 | #[derive(Debug, Serialize)] 112 | pub struct ScanItemOutput { 113 | pub items: Vec, 114 | } 115 | 116 | #[derive(Debug, Serialize)] 117 | pub struct CreateTableOutput; 118 | 119 | #[derive(Debug, Serialize)] 120 | pub struct FlushOutput; 121 | 122 | #[derive(Debug, Serialize)] 123 | #[serde(tag = "error")] 124 | pub enum Error { 125 | Deadlock, 126 | Other { message: String }, 127 | } 128 | -------------------------------------------------------------------------------- /src/slotted.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | mem::size_of, 3 | ops::{Index, IndexMut, Range}, 4 | }; 5 | 6 | use zerocopy::{AsBytes, ByteSlice, ByteSliceMut, FromBytes, LayoutVerified}; 7 | 8 | #[derive(Debug, FromBytes, AsBytes)] 9 | #[repr(C)] 10 | pub struct Header { 11 | num_slots: u16, 12 | free_space_offset: u16, 13 | } 14 | 15 | #[derive(Debug, FromBytes, AsBytes, Clone, Copy)] 16 | #[repr(C)] 17 | pub struct Pointer { 18 | offset: u16, 19 | len: u16, 20 | } 21 | 22 | impl Pointer { 23 | fn range(&self) -> Range { 24 | let start = self.offset as usize; 25 | let end = start + self.len as usize; 26 | start..end 27 | } 28 | } 29 | 30 | pub type Pointers = LayoutVerified; 31 | 32 | pub struct Slotted { 33 | header: LayoutVerified, 34 | body: B, 35 | } 36 | 37 | impl Slotted { 38 | pub fn new(bytes: B) -> Option { 39 | let (header, body) = LayoutVerified::new_from_prefix(bytes)?; 40 | Some(Self { header, body }) 41 | } 42 | 43 | pub fn capacity(&self) -> usize { 44 | self.body.len() 45 | } 46 | 47 | pub fn num_slots(&self) -> usize { 48 | self.header.num_slots as usize 49 | } 50 | 51 | pub fn free_space(&self) -> usize { 52 | self.header.free_space_offset as usize - self.pointers_size() 53 | } 54 | 55 | fn pointers_size(&self) -> usize { 56 | size_of::() * self.num_slots() 57 | } 58 | 59 | fn pointers(&self) -> Pointers<&[u8]> { 60 | Pointers::new_slice(&self.body[..self.pointers_size()]).unwrap() 61 | } 62 | 63 | fn data(&self, pointer: Pointer) -> &[u8] { 64 | &self.body[pointer.range()] 65 | } 66 | } 67 | 68 | impl Slotted { 69 | pub fn initialize(&mut self) { 70 | self.header.num_slots = 0; 71 | self.header.free_space_offset = self.body.len() as u16; 72 | } 73 | 74 | fn pointers_mut(&mut self) -> Pointers<&mut [u8]> { 75 | let pointers_size = self.pointers_size(); 76 | Pointers::new_slice(&mut self.body[..pointers_size]).unwrap() 77 | } 78 | 79 | fn data_mut(&mut self, pointer: Pointer) -> &mut [u8] { 80 | &mut self.body[pointer.range()] 81 | } 82 | 83 | pub fn reverse(&mut self) { 84 | self.pointers_mut().reverse() 85 | } 86 | 87 | pub fn insert(&mut self, index: usize, len: usize) -> Option<()> { 88 | if self.free_space() < size_of::() + len { 89 | return None; 90 | } 91 | let num_slots_orig = self.num_slots(); 92 | self.header.free_space_offset -= len as u16; 93 | self.header.num_slots += 1; 94 | let free_space_offset = self.header.free_space_offset; 95 | let mut pointers_mut = self.pointers_mut(); 96 | pointers_mut.copy_within(index..num_slots_orig, index + 1); 97 | let pointer = &mut pointers_mut[index]; 98 | pointer.offset = free_space_offset; 99 | pointer.len = len as u16; 100 | Some(()) 101 | } 102 | 103 | pub fn remove(&mut self, index: usize) { 104 | self.resize(index, 0); 105 | self.pointers_mut() 106 | .copy_within(index + 1.., index); 107 | self.header.num_slots -= 1; 108 | } 109 | 110 | pub fn resize(&mut self, index: usize, len_new: usize) -> Option<()> { 111 | let pointers = self.pointers(); 112 | let len_orig = pointers[index].len; 113 | let len_incr = len_new as isize - len_orig as isize; 114 | if len_incr == 0 { 115 | return Some(()); 116 | } 117 | if len_incr > self.free_space() as isize { 118 | return None; 119 | } 120 | let free_space_offset = self.header.free_space_offset as usize; 121 | let offset_orig = pointers[index].offset; 122 | let shift_range = free_space_offset..offset_orig as usize; 123 | let free_space_offset_new = (free_space_offset as isize - len_incr) as usize; 124 | self.header.free_space_offset = free_space_offset_new as u16; 125 | self.body.as_bytes_mut().copy_within(shift_range, free_space_offset_new); 126 | let mut pointers_mut = self.pointers_mut(); 127 | for pointer in pointers_mut.iter_mut() { 128 | if pointer.offset <= offset_orig { 129 | pointer.offset = (pointer.offset as isize - len_incr) as u16; 130 | } 131 | } 132 | let pointer = &mut pointers_mut[index]; 133 | pointer.len = len_new as u16; 134 | if len_new == 0 { 135 | pointer.offset = free_space_offset_new as u16; 136 | } 137 | Some(()) 138 | } 139 | } 140 | 141 | impl Index for Slotted { 142 | type Output = [u8]; 143 | 144 | fn index(&self, index: usize) -> &Self::Output { 145 | self.data(self.pointers()[index]) 146 | } 147 | } 148 | 149 | impl IndexMut for Slotted { 150 | fn index_mut(&mut self, index: usize) -> &mut Self::Output { 151 | self.data_mut(self.pointers()[index]) 152 | } 153 | } 154 | 155 | #[cfg(test)] 156 | mod tests { 157 | use super::*; 158 | 159 | #[test] 160 | fn test() { 161 | let mut page_data = vec![0u8; 128]; 162 | let mut slotted = Slotted::new(page_data.as_mut_slice()).unwrap(); 163 | let insert = |slotted: &mut Slotted<&mut [u8]>, index: usize, buf: &[u8]| { 164 | slotted.insert(index, buf.len()).unwrap(); 165 | slotted[index].copy_from_slice(buf); 166 | }; 167 | let push = |slotted: &mut Slotted<&mut [u8]>, buf: &[u8]| { 168 | let index = slotted.num_slots() as usize; 169 | insert(slotted, index, buf); 170 | }; 171 | slotted.initialize(); 172 | push(&mut slotted, b"hello"); 173 | push(&mut slotted, b"world"); 174 | assert_eq!(&slotted[0], b"hello"); 175 | assert_eq!(&slotted[1], b"world"); 176 | insert(&mut slotted, 1, b", "); 177 | push(&mut slotted, b"!"); 178 | assert_eq!(&slotted[0], b"hello"); 179 | assert_eq!(&slotted[1], b", "); 180 | assert_eq!(&slotted[2], b"world"); 181 | assert_eq!(&slotted[3], b"!"); 182 | } 183 | } 184 | --------------------------------------------------------------------------------