├── .editorconfig ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .hooky └── pre-commit ├── CONTRIBUTING.md ├── Cargo.toml ├── LICENSE ├── README.md ├── src ├── btree │ ├── mod.rs │ ├── node.rs │ └── spec.rs ├── build.rs ├── collection │ ├── mod.rs │ ├── strategies │ │ ├── branch.rs │ │ ├── lock.rs │ │ └── mod.rs │ ├── xact.rs │ └── xlog.rs ├── config.rs ├── error.rs ├── lib.rs ├── page.rs ├── page │ ├── io.rs │ ├── layout.rs │ └── spec.rs └── serializer.rs └── tests ├── collection_tests.rs └── run-tests.sh /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig is awesome: https://EditorConfig.org 2 | 3 | # top-most EditorConfig file 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 4 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = false 12 | insert_final_newline = false -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Integration (CI) 2 | 3 | on: 4 | push: 5 | pull_request: 6 | types: 7 | - reopened 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | test: 14 | runs-on: ubuntu-latest 15 | name: Test the application 16 | steps: 17 | - uses: actions/checkout@v3 18 | - name: Build the application 19 | run: cargo build --verbose 20 | - name: Run tests 21 | run: cargo test --verbose 22 | lint: 23 | runs-on: ubuntu-latest 24 | name: Lint the application 25 | steps: 26 | - name: Checkout repository 27 | uses: actions/checkout@v4 28 | - name: Install Clippy 29 | run: rustup component add clippy 30 | - name: Run linter 31 | run: cargo clippy --verbose --all-targets --all-features 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | 4 | # MacOS 5 | .DS_Store 6 | 7 | # Test folders 8 | /test_data 9 | /example 10 | -------------------------------------------------------------------------------- /.hooky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Run pre-commit hooks 3 | 4 | set -o errexit 5 | set -o nounset 6 | set -o pipefail 7 | 8 | . ./tests/run-tests.sh 9 | 10 | exit 0 11 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # 🔗 How to contribute 2 | Every help is welcome! See how to contribute below 3 | 4 | # Starting 5 | 1. Fork this project on Github 6 | 2. Make a clone of the created fork repository: `git clone https://github.com/youruser/dustdata.git` 7 | 3. Create a branch to commit your feature or fix: `git checkout -b my-branch` 8 | 4. And make your changes! 9 | 10 | ## Unit tests 11 | If are you creating a new feature, make sure to create a unit tests to it. To make a unit tests, add the following code in the same file of you feature: 12 | ```rust 13 | // Unit tests 14 | #[cfg(test)] 15 | mod your_feature_tests { 16 | #[test] 17 | fn testing_feature() { 18 | ... 19 | } 20 | } 21 | ``` 22 | 23 | ### Running unit tests 24 | See more about unit tests [here](https://doc.rust-lang.org/rust-by-example/testing/unit_testing.html) 25 | 26 | ## Commit messages 27 | We suggest that commit messages follow the *conventional commit*. 28 | 29 | See about *conventional commit* [here](https://www.conventionalcommits.org/en/v1.0.0/) 30 | 31 | # When you're done, make your Pull Request! 32 | * Commit your changes 33 | * Push your branch to your fork: `git push origin my-branch` 34 | * Go to Pull Requests from the root repository and create a [Pull Request](https://github.com/rustbase/rustbase/pulls) with your commit(s) -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "dustdata" 3 | version = "2.0.0-beta.6" 4 | edition = "2021" 5 | description = "A data concurrency control storage engine to Rustbase" 6 | repository = "https://github.com/rustbase/dustdata" 7 | homepage = "https://github.com/rustbase/dustdata" 8 | authors = ["Rustbase"] 9 | license = "MIT" 10 | exclude = [ 11 | "test_data/*", 12 | "README.md", 13 | "CONTRIBUTING.md", 14 | ".editorconfig", 15 | ".cargo/*", 16 | ] 17 | build = "./src/build.rs" 18 | 19 | [lib] 20 | doctest = false 21 | 22 | [dependencies] 23 | flate2 = "1.0" 24 | serde = { version = "1.0", features = ["derive"] } 25 | bloomfilter = "1.0" 26 | fs2 = "0.4.3" 27 | bincode = "1.3.3" 28 | crc32fast = "1.4.2" 29 | glob = "0.3.1" 30 | 31 | [build-dependencies] 32 | hooky-rs = "1.0.0" 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 Rustbase 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![crates.io](https://img.shields.io/crates/v/dustdata?color=EA4342&style=flat-square)](https://crates.io/crates/dustdata) 2 | [![docs.rs](https://img.shields.io/docsrs/dustdata?style=flat-square)](https://docs.rs/dustdata) 3 | 4 | 5 | # DustData 6 | A data concurrency control storage engine to [Rustbase](https://github.com/rustbase/rustbase) 7 | 8 | Join our [community](https://discord.gg/m5ZzWPumbd) and [chat](https://discord.gg/m5ZzWPumbd) with other Rust users. 9 | 10 | # ⚠️ Warning 11 | This is a work in progress. The API is not stable yet. 12 | 13 | # 🔗 Contribute 14 | [Click here](./CONTRIBUTING.md) to see how to Contribute 15 | 16 | # How to install 17 | Add the following to your `Cargo.toml`: 18 | 19 | ```toml 20 | [dependencies] 21 | dustdata = "2.0.0-beta.6" 22 | ``` 23 | 24 | # Usage 25 | Initialize a new `DustData` instance with the default configuration: 26 | ```rust 27 | use dustdata::DustData; 28 | 29 | let mut dustdata = DustData::new(Default::default()).unwrap(); 30 | ``` 31 | 32 | ## Inserting data into a collection 33 | 34 | ```rust 35 | #[derive(Serialize, Deserialize, Clone, Debug)] 36 | struct User { 37 | name: String, 38 | age: u32, 39 | } 40 | 41 | let collection = dustdata.collection::("users"); 42 | 43 | let user = User { 44 | name: "Pedro".to_string(), 45 | age: 21, 46 | }; 47 | 48 | // Creating a new transaction. 49 | let mut transaction = collection.start_branch(); 50 | 51 | // Inserting the user into the transaction. 52 | transaction.insert("user:1", user); 53 | 54 | // Committing the transaction. 55 | collection.commit(transaction).unwrap(); 56 | 57 | // Done! 58 | ``` 59 | 60 | ## Reading data from a collection 61 | 62 | ```rust 63 | let collection = dustdata.collection::("users").unwrap(); 64 | 65 | let user = collection.get("user:1").unwrap(); 66 | ``` 67 | 68 | 69 | # Authors 70 | 71 |
72 | 73 | | [
@peeeuzin](https://github.com/peeeuzin) | 74 | | :-------------------------------------------------------------------------------------------------------------------: | 75 | 76 |
77 | 78 | # License 79 | 80 | [MIT License](./LICENSE) 81 | -------------------------------------------------------------------------------- /src/btree/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::{Error, Result}, 3 | page::{ 4 | io::BlockIO, 5 | spec::{LocationOffset, PageNumber}, 6 | Page, 7 | }, 8 | Either, 9 | }; 10 | use glob::Pattern; 11 | use node::BTreeNode; 12 | use serde::{de::DeserializeOwned, Serialize}; 13 | use std::{ 14 | cmp::{self, Ordering}, 15 | fmt::Debug, 16 | marker::PhantomData, 17 | ops::{Bound, RangeBounds}, 18 | }; 19 | 20 | pub mod node; 21 | pub mod spec; 22 | use spec::{ 23 | BTreeBlockHeader, BTreeCell, BTreePageHeader, BTreePair, PageType, BTREE_PAGE_HEADER_SIZE, 24 | }; 25 | 26 | pub const MAX_BRANCHING_FACTOR: u16 = 100; 27 | 28 | pub trait KeyTrait: Serialize + DeserializeOwned + PartialOrd + Ord + Clone + Debug {} 29 | impl KeyTrait for T {} 30 | 31 | pub trait ValueTrait: Serialize + DeserializeOwned + PartialOrd + Clone + Ord + Debug {} 32 | impl ValueTrait for T {} 33 | 34 | #[derive(Debug)] 35 | pub struct Search { 36 | /// The page number where the search ended 37 | pub page: PageNumber, 38 | 39 | /// The index where the search ended 40 | pub index: Either, 41 | } 42 | 43 | impl Search { 44 | pub fn next_cell(&self, btree: &BTree) -> Result 45 | where 46 | K: KeyTrait, 47 | V: ValueTrait, 48 | { 49 | let mut page = self.page; 50 | let cell; 51 | 52 | match self.index { 53 | Either::Left(index) => cell = Either::Left(index + 1), 54 | Either::Right(_) => { 55 | page = self.page + 1; 56 | cell = Either::Left(0); 57 | } 58 | }; 59 | 60 | if btree.io.page_exists(page).map_err(Error::IoError)? { 61 | Ok(Self { index: cell, page }) 62 | } else { 63 | Ok(Self { 64 | index: cell, 65 | page: self.page, 66 | }) 67 | } 68 | } 69 | } 70 | 71 | pub struct BTree { 72 | root: PageNumber, 73 | b: u16, 74 | pub io: BlockIO, 75 | 76 | _k: PhantomData, 77 | _v: PhantomData, 78 | } 79 | 80 | impl BTree { 81 | pub fn new(mut io: BlockIO) -> Result { 82 | let mut metadata_page: Page = 83 | io.read_metadata_page().map_err(Error::IoError)?; 84 | 85 | match metadata_page.is_empty() { 86 | false => { 87 | let block_header = metadata_page.read(0)?.unwrap(); 88 | 89 | Ok(Self { 90 | io, 91 | root: block_header.root, 92 | b: MAX_BRANCHING_FACTOR, 93 | _k: PhantomData, 94 | _v: PhantomData, 95 | }) 96 | } 97 | 98 | true => Self::create_block(io), 99 | } 100 | } 101 | 102 | fn create_block(mut io: BlockIO) -> Result { 103 | let mut root: Page> = Page::create(BTREE_PAGE_HEADER_SIZE).unwrap(); 104 | let metadata = BTreePageHeader::new(PageType::Leaf, None); 105 | root.write_special(&metadata.to_bytes()).unwrap(); 106 | 107 | let root_page = io.write_new_page(&root).unwrap(); 108 | 109 | let block_header = BTreeBlockHeader { root: root_page }; 110 | 111 | let mut metadata_page: Page = 112 | io.read_metadata_page().map_err(Error::IoError)?; 113 | 114 | metadata_page.insert(0, block_header)?; 115 | 116 | io.write_metadata_page(&metadata_page).unwrap(); 117 | 118 | Ok(Self { 119 | io, 120 | root: root_page, 121 | b: MAX_BRANCHING_FACTOR, 122 | _k: PhantomData, 123 | _v: PhantomData, 124 | }) 125 | } 126 | 127 | pub fn search(&mut self, key: &K) -> Result { 128 | self.search_from_subtree(key, self.root, &mut Vec::new()) 129 | } 130 | 131 | fn search_from_subtree( 132 | &mut self, 133 | key: &K, 134 | page_number: PageNumber, 135 | parents: &mut Vec, 136 | ) -> Result { 137 | self.search_from_subtree_by( 138 | |page| page.binary_search_by_key(key, |e| e.key.clone()), 139 | page_number, 140 | parents, 141 | ) 142 | } 143 | 144 | pub fn search_from_subtree_by( 145 | &mut self, 146 | mut f: F, 147 | page_number: PageNumber, 148 | parents: &mut Vec, 149 | ) -> Result 150 | where 151 | F: FnMut(&mut Page>) -> Either, 152 | { 153 | let mut node: BTreeNode = self 154 | .io 155 | .read_page(page_number.into()) 156 | .map_err(Error::IoError)? 157 | .try_into()?; 158 | 159 | match node.header.kind { 160 | PageType::Internal | PageType::Root => { 161 | parents.push(page_number); 162 | let index = f(&mut node.page); 163 | 164 | if index.is_left() { 165 | return Ok(Search { 166 | index, 167 | page: page_number, 168 | }); 169 | } 170 | 171 | let next_cell = *index.right().unwrap(); 172 | 173 | let next_page = node.child(next_cell)?.unwrap(); 174 | 175 | self.search_from_subtree_by(f, next_page, parents) 176 | } 177 | 178 | PageType::Leaf => { 179 | let index = f(&mut node.page); 180 | 181 | Ok(Search { 182 | index, 183 | page: page_number, 184 | }) 185 | } 186 | } 187 | } 188 | 189 | pub fn get(&mut self, key: &K) -> Result> { 190 | let search = self.search_from_subtree(key, self.root, &mut Vec::new())?; 191 | 192 | if let Either::Left(index) = search.index { 193 | let mut page: Page> = self 194 | .io 195 | .read_page(search.page.into()) 196 | .map_err(Error::IoError)?; 197 | 198 | let cell = page.read(index)?; 199 | 200 | Ok(cell.map(|c| c.value.unwrap())) 201 | } else { 202 | Ok(None) 203 | } 204 | } 205 | 206 | pub fn insert(&mut self, key: K, value: V) -> Result<()> { 207 | let root: BTreeNode = self 208 | .io 209 | .read_page(self.root.into()) 210 | .map_err(Error::IoError)? 211 | .try_into()?; 212 | 213 | if root.is_full(self.b) { 214 | let mut splited = root.split(self.b)?; 215 | 216 | let sibling_page_number = self 217 | .io 218 | .write_new_page(&splited.sibling_node.page) 219 | .map_err(Error::IoError)?; 220 | 221 | // create new root 222 | let mut new_root = Page::>::create(BTREE_PAGE_HEADER_SIZE)?; 223 | // set sibling page index to new root right child 224 | let new_root_metadata = BTreePageHeader::new(PageType::Root, Some(sibling_page_number)); 225 | new_root.write_special(&new_root_metadata.to_bytes())?; 226 | 227 | splited.median_cell.left_child = Some(self.root); 228 | // write median cell to new root page 229 | new_root.write(splited.median_cell)?; 230 | 231 | // write old root to disk 232 | self.io 233 | .write_page(self.root.into(), &splited.node.page) 234 | .map_err(Error::IoError)?; 235 | 236 | // write new root to disk and set new root index 237 | let root_page = self.io.write_new_page(&new_root).map_err(Error::IoError)?; 238 | self.set_root(root_page)?; 239 | } 240 | 241 | self.insert_non_full(self.root, key, value) 242 | } 243 | 244 | fn insert_non_full(&mut self, page_number: PageNumber, key: K, value: V) -> Result<()> { 245 | let mut node: BTreeNode = self 246 | .io 247 | .read_page(page_number.into()) 248 | .map_err(Error::IoError)? 249 | .try_into()?; 250 | 251 | let index = node.page.binary_search_by_key(&key, |e| e.key.clone()); 252 | 253 | let index = match index { 254 | Either::Left(index) => index, 255 | Either::Right(index) => index, 256 | }; 257 | 258 | match node.header.kind { 259 | PageType::Internal | PageType::Root => { 260 | let page_child_page_number = node.child(index)?.unwrap(); 261 | 262 | let page_child: BTreeNode = self 263 | .io 264 | .read_page(page_child_page_number.into()) 265 | .map_err(Error::IoError)? 266 | .try_into()?; 267 | 268 | if !page_child.is_full(self.b) { 269 | return self.insert_non_full(page_child_page_number, key, value); 270 | } 271 | 272 | let mut splited = page_child.split(self.b)?; 273 | 274 | let sibling_page_number = self 275 | .io 276 | .write_new_page(&splited.sibling_node.page) 277 | .map_err(Error::IoError)?; 278 | 279 | node.set_child(index + 1, sibling_page_number)?; 280 | 281 | splited.median_cell.left_child = Some(page_child_page_number); 282 | 283 | let median_cell_key = splited.median_cell.key.clone(); 284 | node.page.insert(index, splited.median_cell)?; 285 | 286 | self.io 287 | .write_page(page_child_page_number.into(), &splited.node.page) 288 | .map_err(Error::IoError)?; 289 | 290 | self.io 291 | .write_page(page_number.into(), &node.page) 292 | .map_err(Error::IoError)?; 293 | 294 | let insert_page_offset = match key.cmp(&median_cell_key) { 295 | Ordering::Less | Ordering::Equal => page_child_page_number, 296 | Ordering::Greater => sibling_page_number, 297 | }; 298 | 299 | self.insert_non_full(insert_page_offset, key, value) 300 | } 301 | 302 | PageType::Leaf => { 303 | let cell = BTreeCell { 304 | left_child: None, 305 | key, 306 | value: Some(value), 307 | }; 308 | 309 | node.page.insert(index, cell)?; 310 | 311 | self.io 312 | .write_page(page_number.into(), &node.page) 313 | .map_err(Error::IoError)?; 314 | 315 | Ok(()) 316 | } 317 | } 318 | } 319 | 320 | pub fn values(&mut self) -> Result> { 321 | let values = self.pairs_from_subtree(self.root)?.into_iter(); 322 | let values = values.map(|e| e.value.unwrap()).collect::>(); 323 | 324 | Ok(values) 325 | } 326 | 327 | pub fn keys(&mut self) -> Result> { 328 | let keys = self.pairs_from_subtree(self.root)?.into_iter(); 329 | let keys = keys.map(|e| e.key).collect::>(); 330 | 331 | Ok(keys) 332 | } 333 | 334 | pub fn pairs(&mut self) -> Result>> { 335 | self.pairs_from_subtree(self.root) 336 | } 337 | 338 | fn pairs_from_subtree(&mut self, page_number: PageNumber) -> Result>> { 339 | let mut node: BTreeNode = self 340 | .io 341 | .read_page(page_number.into()) 342 | .map_err(Error::IoError)? 343 | .try_into()?; 344 | 345 | let mut results = Vec::new(); 346 | 347 | for index in 0..node.page.len() { 348 | let child = node.child(index)?; 349 | 350 | if let Some(child) = child { 351 | results.extend(self.pairs_from_subtree(child)?) 352 | } 353 | 354 | results.extend(node.page.read(index)?.map(|m| m.to_pair())); 355 | } 356 | 357 | let right = node.child(node.page.len())?; 358 | if let Some(right) = right { 359 | results.extend(self.pairs_from_subtree(right)?) 360 | } 361 | 362 | Ok(results) 363 | } 364 | 365 | pub fn delete(&mut self, key: &K) -> Result<()> { 366 | self.delete_from_subtree(self.root, key, &mut Vec::new()) 367 | } 368 | 369 | fn delete_from_subtree( 370 | &mut self, 371 | page_number: PageNumber, 372 | key: &K, 373 | parents: &mut Vec, 374 | ) -> Result<()> { 375 | // search key 376 | let search = self.search_from_subtree(key, page_number, parents)?; 377 | 378 | // get searched page node 379 | let mut node: BTreeNode = self 380 | .io 381 | .read_page(search.page.into()) 382 | .map_err(Error::IoError)? 383 | .try_into()?; 384 | 385 | // return if not found 386 | if search.index.is_right() { 387 | return Err(Error::NotFound(format!("key {:?}", key))); 388 | } 389 | 390 | // delete cell 391 | let index = *search.index.left().unwrap(); 392 | node.page.delete(index)?; 393 | let page = node.page.compact()?; 394 | 395 | self.io 396 | .write_page(search.page.into(), &page) 397 | .map_err(Error::IoError)?; 398 | 399 | // check for underflow 400 | self.borrow_if_needed(search.page, parents, key) 401 | } 402 | 403 | fn borrow_if_needed( 404 | &mut self, 405 | page_number: PageNumber, 406 | parents: &mut Vec, 407 | key: &K, 408 | ) -> Result<()> { 409 | let node: BTreeNode = self 410 | .io 411 | .read_page(page_number.into()) 412 | .map_err(Error::IoError)? 413 | .try_into()?; 414 | 415 | if !node.is_underflow(self.b) || parents.is_empty() { 416 | return Ok(()); 417 | } 418 | 419 | let parent_index = parents.pop().unwrap(); 420 | 421 | let mut parent: BTreeNode = self 422 | .io 423 | .read_page(parent_index.into()) 424 | .map_err(Error::IoError)? 425 | .try_into()?; 426 | 427 | let index = parent.page.binary_search_by_key(key, |e| e.key.clone()); 428 | 429 | let index = match index { 430 | Either::Left(index) => index, 431 | Either::Right(index) => index, 432 | }; 433 | let sibling_index = match index > 0 { 434 | false => index + 1, 435 | true => index - 1, 436 | }; 437 | 438 | let sibling_page_number = parent.child(sibling_index)?.unwrap(); 439 | 440 | let sibling: BTreeNode = self 441 | .io 442 | .read_page(sibling_page_number.into()) 443 | .map_err(Error::IoError)? 444 | .try_into()?; 445 | 446 | let merged_node_idx = cmp::min(index, sibling_index); 447 | 448 | let cell = parent.page.read(merged_node_idx)?.unwrap(); 449 | parent.page.delete(merged_node_idx)?; 450 | let mut parent: BTreeNode = parent.page.compact()?.try_into()?; 451 | 452 | let mut merged_node = node.merge(sibling, cell)?; 453 | 454 | let merged_page_number = self 455 | .io 456 | .write_new_page(&merged_node.page) 457 | .map_err(Error::IoError)?; 458 | 459 | if parent.is_root() && parent.page.is_empty() { 460 | merged_node.header.kind = PageType::Root; 461 | merged_node 462 | .page 463 | .write_special(&merged_node.header.to_bytes())?; 464 | 465 | self.io 466 | .write_page(merged_page_number.into(), &merged_node.page) 467 | .map_err(Error::IoError)?; 468 | 469 | self.set_root(merged_page_number)?; 470 | } else { 471 | parent.set_child(merged_node_idx, merged_page_number)?; 472 | } 473 | 474 | self.io 475 | .write_page(parent_index.into(), &parent.page) 476 | .map_err(Error::IoError)?; 477 | 478 | if let Some(parent) = parents.pop() { 479 | self.borrow_if_needed(parent, parents, key)?; 480 | } 481 | 482 | Ok(()) 483 | } 484 | 485 | fn set_root(&mut self, page_number: PageNumber) -> Result<()> { 486 | self.root = page_number; 487 | 488 | let block_header = BTreeBlockHeader { root: self.root }; 489 | 490 | let mut metadata_page: Page = 491 | self.io.read_metadata_page().map_err(Error::IoError)?; 492 | 493 | metadata_page.insert(0, block_header)?; 494 | 495 | self.io.write_metadata_page(&metadata_page).unwrap(); 496 | 497 | Ok(()) 498 | } 499 | 500 | pub fn iter(&mut self) -> BTreeIterator<'_, K, V> { 501 | BTreeIterator::new(self, None, None) 502 | } 503 | 504 | pub fn len(&mut self) -> Result { 505 | Ok(self.pairs_from_subtree(self.root)?.len()) 506 | } 507 | 508 | pub fn is_empty(&mut self) -> Result { 509 | Ok(self.len()? == 0) 510 | } 511 | 512 | pub fn contains(&mut self, key: &K) -> Result { 513 | Ok(self.get(key)?.is_some()) 514 | } 515 | 516 | pub fn sync(&self) -> Result<()> { 517 | self.io.sync().map_err(Error::IoError) 518 | } 519 | 520 | pub fn range(&mut self, range: R) -> Result> 521 | where 522 | R: RangeBounds, 523 | { 524 | let start_index = match range.start_bound() { 525 | Bound::Unbounded => None, 526 | Bound::Included(key) => Some(self.search(key)?), 527 | 528 | _ => unimplemented!(), 529 | }; 530 | 531 | let end_index = match range.end_bound() { 532 | Bound::Unbounded => None, 533 | Bound::Excluded(key) => Some(self.search(key)?), 534 | Bound::Included(key) => { 535 | let search = self.search(key)?.next_cell(self)?; 536 | 537 | Some(search) 538 | } 539 | }; 540 | 541 | let start_pos = start_index.map(|s| { 542 | let index = match s.index { 543 | Either::Left(index) => index, 544 | Either::Right(index) => index, 545 | }; 546 | 547 | BTreeIteratorPosition { 548 | index, 549 | page: s.page, 550 | } 551 | }); 552 | 553 | let end_pos = end_index.map(|s| { 554 | let index = match s.index { 555 | Either::Left(index) => index, 556 | Either::Right(index) => index, 557 | }; 558 | 559 | BTreeIteratorPosition { 560 | index, 561 | page: s.page, 562 | } 563 | }); 564 | 565 | Ok(BTreeIterator::new(self, start_pos, end_pos)) 566 | } 567 | } 568 | 569 | #[derive(Debug)] 570 | pub struct BTreeIteratorPosition { 571 | page: PageNumber, 572 | index: LocationOffset, 573 | } 574 | 575 | pub struct BTreeIterator<'b, K, V> { 576 | btree: &'b mut BTree, 577 | parents: Vec, 578 | pos: BTreeIteratorPosition, 579 | end_at: Option, 580 | done: bool, 581 | } 582 | 583 | impl<'b, K: KeyTrait, V: ValueTrait> BTreeIterator<'b, K, V> { 584 | pub fn new( 585 | btree: &'b mut BTree, 586 | start_at: Option, 587 | end_at: Option, 588 | ) -> Self { 589 | let default_pos = BTreeIteratorPosition { 590 | index: 0, 591 | page: btree.root, 592 | }; 593 | 594 | let mut iter = Self { 595 | btree, 596 | pos: default_pos, 597 | parents: Vec::new(), 598 | end_at, 599 | done: false, 600 | }; 601 | 602 | iter.move_to_leftmost().unwrap(); 603 | 604 | if let Some(pos) = start_at { 605 | iter.pos = pos; 606 | } 607 | 608 | iter 609 | } 610 | 611 | fn move_to_leftmost(&mut self) -> Result<()> { 612 | let mut node: BTreeNode = self 613 | .btree 614 | .io 615 | .read_page(self.pos.page.into()) 616 | .map_err(Error::IoError)? 617 | .try_into()?; 618 | 619 | while !node.is_leaf() { 620 | self.parents.push(self.pos.page); 621 | 622 | self.pos.page = node.child(0)?.unwrap(); 623 | 624 | let next_node: BTreeNode = self 625 | .btree 626 | .io 627 | .read_page(self.pos.page.into()) 628 | .map_err(Error::IoError)? 629 | .try_into()?; 630 | 631 | node = next_node 632 | } 633 | 634 | self.pos.index = 0; 635 | 636 | Ok(()) 637 | } 638 | 639 | fn move_to_rightmost(&mut self) -> Result<()> { 640 | let mut node: BTreeNode = self 641 | .btree 642 | .io 643 | .read_page(self.pos.page.into()) 644 | .map_err(Error::IoError)? 645 | .try_into()?; 646 | 647 | while !node.is_leaf() { 648 | self.parents.push(self.pos.page); 649 | 650 | let len = node.len(); 651 | self.pos.page = node.child(len)?.unwrap(); 652 | 653 | let next_node: BTreeNode = self 654 | .btree 655 | .io 656 | .read_page(self.pos.page.into()) 657 | .map_err(Error::IoError)? 658 | .try_into()?; 659 | 660 | node = next_node 661 | } 662 | 663 | self.pos.index = node.len() - 1; 664 | 665 | Ok(()) 666 | } 667 | } 668 | 669 | impl Iterator for BTreeIterator<'_, K, V> { 670 | type Item = BTreePair; 671 | 672 | fn next(&mut self) -> Option { 673 | if self.done { 674 | return None; 675 | } 676 | 677 | let mut node: BTreeNode = self 678 | .btree 679 | .io 680 | .read_page(self.pos.page.into()) 681 | .unwrap() 682 | .try_into() 683 | .unwrap(); 684 | 685 | if node.is_empty() && node.is_leaf() { 686 | self.done = true; 687 | return None; 688 | } 689 | 690 | if let Some(end_at) = &self.end_at { 691 | if end_at.index == self.pos.index && end_at.page == self.pos.page { 692 | self.done = true; 693 | return None; 694 | } 695 | } 696 | 697 | let cell = node.page.read(self.pos.index).unwrap(); 698 | 699 | if node.is_leaf() && self.pos.index + 1 < node.len() { 700 | self.pos.index += 1; 701 | return Some(cell.unwrap().to_pair()); 702 | } 703 | 704 | if !node.is_leaf() && self.pos.index < node.len() { 705 | self.parents.push(self.pos.page); 706 | self.pos.page = node.child(self.pos.index + 1).unwrap().unwrap(); 707 | self.move_to_leftmost().unwrap(); 708 | 709 | return Some(cell.unwrap().to_pair()); 710 | } 711 | 712 | let mut found_branch = false; 713 | 714 | while !self.parents.is_empty() && !found_branch { 715 | let parent_page = self.parents.pop().unwrap(); 716 | let mut parent: BTreeNode = self 717 | .btree 718 | .io 719 | .read_page(parent_page.into()) 720 | .unwrap() 721 | .try_into() 722 | .unwrap(); 723 | 724 | let index = parent 725 | .iter_children() 726 | .position(|c| c == self.pos.page) 727 | .unwrap() as u16; 728 | 729 | self.pos.page = parent_page; 730 | 731 | if index < parent.len() { 732 | self.pos.index = index; 733 | found_branch = true; 734 | } 735 | } 736 | 737 | if self.parents.is_empty() && !found_branch { 738 | self.done = true; 739 | } 740 | 741 | Some(cell.unwrap().to_pair()) 742 | } 743 | 744 | fn last(mut self) -> Option 745 | where 746 | Self: Sized, 747 | { 748 | self.move_to_rightmost().unwrap(); 749 | 750 | let mut node: BTreeNode = self 751 | .btree 752 | .io 753 | .read_page(self.pos.page.into()) 754 | .unwrap() 755 | .try_into() 756 | .unwrap(); 757 | 758 | let cell = node.page.read(self.pos.index).unwrap(); 759 | 760 | self.done = true; 761 | 762 | Some(cell.unwrap().to_pair()) 763 | } 764 | } 765 | 766 | impl BTree { 767 | /// O(n) worst-case complexity 768 | pub fn find_pattern<'a>( 769 | &'a mut self, 770 | key_pattern: &'a str, 771 | ) -> impl Iterator> + 'a { 772 | let iter = self.iter(); 773 | 774 | iter.filter_map(|c| { 775 | if Pattern::new(key_pattern).unwrap().matches(&c.key) { 776 | Some(c) 777 | } else { 778 | None 779 | } 780 | }) 781 | } 782 | } 783 | 784 | #[cfg(test)] 785 | mod btree_tests { 786 | use super::*; 787 | 788 | #[test] 789 | fn create_btree() { 790 | let io = BlockIO::new("test_data/btree.db").unwrap(); 791 | let mut btree = BTree::::new(io).unwrap(); 792 | 793 | for i in 0u32..=100 { 794 | btree.insert(i, i * 2).unwrap(); 795 | } 796 | 797 | let value = btree.get(&2).unwrap().unwrap(); 798 | assert_eq!(value, 4); 799 | 800 | let value = btree.get(&10).unwrap().unwrap(); 801 | assert_eq!(value, 20); 802 | 803 | let value = btree.get(&50).unwrap().unwrap(); 804 | assert_eq!(value, 100); 805 | 806 | let value = btree.get(&75).unwrap().unwrap(); 807 | assert_eq!(value, 150); 808 | 809 | let value = btree.get(&99).unwrap().unwrap(); 810 | assert_eq!(value, 198); 811 | 812 | let length = btree.len().unwrap(); 813 | assert_eq!(length, 101); 814 | 815 | for (index, pair) in btree.range(..).unwrap().enumerate() { 816 | assert_eq!(pair.value.unwrap(), index as u32 * 2); 817 | } 818 | } 819 | } 820 | -------------------------------------------------------------------------------- /src/btree/node.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | error::{Error, Result}, 3 | page::{spec::PageNumber, Page}, 4 | }; 5 | use serde::{de::DeserializeOwned, Serialize}; 6 | 7 | use super::{ 8 | spec::{BTreeCell, BTreePageHeader, PageType, BTREE_PAGE_HEADER_SIZE}, 9 | KeyTrait, ValueTrait, 10 | }; 11 | 12 | pub struct BTreeNode { 13 | pub page: Page>, 14 | pub header: BTreePageHeader, 15 | } 16 | 17 | pub struct BTreeNodeSplited { 18 | pub node: BTreeNode, 19 | pub median_cell: BTreeCell, 20 | pub sibling_node: BTreeNode, 21 | } 22 | 23 | impl BTreeNode { 24 | pub fn is_full(&self, b: u16) -> bool { 25 | match self.header.kind { 26 | PageType::Internal | PageType::Root => self.page.len() == (b * 2 - 1), 27 | PageType::Leaf => self.page.len() == (b * 2), 28 | } 29 | } 30 | 31 | pub fn is_underflow(&self, b: u16) -> bool { 32 | match self.header.kind { 33 | PageType::Root => false, 34 | PageType::Internal | PageType::Leaf => self.page.len() < b - 1, 35 | } 36 | } 37 | 38 | pub fn is_root(&self) -> bool { 39 | self.header.kind == PageType::Root 40 | } 41 | 42 | pub fn is_leaf(&self) -> bool { 43 | self.header.kind == PageType::Leaf 44 | } 45 | 46 | pub fn len(&mut self) -> u16 { 47 | self.page.len() 48 | } 49 | 50 | pub fn is_empty(&mut self) -> bool { 51 | self.page.is_empty() 52 | } 53 | 54 | pub fn split(mut self, b: u16) -> Result> { 55 | // split page cells at the middle 56 | let mut sibling_cells = self.page.split_off(b - 1)?; 57 | // get the median cell and remove from sibling_cells 58 | let median_cell = sibling_cells.remove(0); 59 | 60 | // garbage collect page 61 | self.page = self.page.compact()?; 62 | 63 | // create sibling page 64 | let mut sibling = Page::>::create(BTREE_PAGE_HEADER_SIZE)?; 65 | 66 | let (page_type, sibling_page_right_child) = match self.header.kind { 67 | PageType::Root | PageType::Internal => (PageType::Internal, self.header.right_child), 68 | PageType::Leaf => (PageType::Leaf, None), 69 | }; 70 | 71 | // setting sibling page metadata 72 | let sibling_metadata = BTreePageHeader::new(page_type, sibling_page_right_child); 73 | sibling.write_special(&sibling_metadata.to_bytes())?; 74 | 75 | // write all sibling cells 76 | sibling.write_all(sibling_cells)?; 77 | 78 | let page_metadata = BTreePageHeader::new(page_type, median_cell.left_child); 79 | self.page.write_special(&page_metadata.to_bytes())?; 80 | 81 | Ok(BTreeNodeSplited { 82 | node: self, 83 | median_cell, 84 | sibling_node: sibling.try_into()?, 85 | }) 86 | } 87 | 88 | /// Read child index by cell index 89 | pub fn child(&mut self, index: u16) -> Result> { 90 | if self.is_leaf() { 91 | return Ok(None); 92 | } 93 | 94 | if index >= self.page.len() { 95 | Ok(self.header.right_child) 96 | } else { 97 | Ok(self.page.read(index)?.unwrap().left_child) 98 | } 99 | } 100 | 101 | /// Set child cell index with index 102 | pub fn set_child(&mut self, index: u16, child_index: PageNumber) -> Result<()> { 103 | if index >= self.page.len() { 104 | self.header.right_child = Some(child_index); 105 | self.page.write_special(&self.header.to_bytes())?; 106 | } else { 107 | let mut cell = self.page.read(index)?.unwrap(); 108 | cell.left_child = Some(child_index); 109 | self.page.replace(index, cell)?; 110 | } 111 | 112 | Ok(()) 113 | } 114 | 115 | pub fn max_key(&mut self) -> Result> { 116 | if self.page.is_empty() { 117 | return Ok(None); 118 | } 119 | 120 | Ok(self.page.read(self.page.len() - 1)?.map(|e| e.key)) 121 | } 122 | 123 | pub fn min_key(&mut self) -> Result> { 124 | if self.page.is_empty() { 125 | return Ok(None); 126 | } 127 | 128 | Ok(self.page.read(0)?.map(|e| e.key)) 129 | } 130 | 131 | pub fn merge(mut self, mut other: Self, mut cell: BTreeCell) -> Result { 132 | let new_page_header = BTreePageHeader::new(self.header.kind, None); 133 | match self.min_key()? >= other.min_key()? { 134 | // merge with left sibling node 135 | true => { 136 | cell.left_child = other.header.right_child; 137 | } 138 | // merge with right sibling node 139 | false => { 140 | cell.left_child = self.header.right_child; 141 | self.header.right_child = other.header.right_child 142 | } 143 | }; 144 | 145 | let node_values = self.page.values()?.into_iter(); 146 | let other_values = other.page.values()?.into_iter(); 147 | 148 | let mut merged_values = node_values 149 | .chain(other_values) 150 | .collect::>>(); 151 | 152 | merged_values.push(cell); 153 | 154 | merged_values.sort_by(|a, b| a.key.cmp(&b.key)); 155 | 156 | let mut page: Page> = Page::create(self.page.special_size())?; 157 | page.write_all(merged_values)?; 158 | page.write_special(&new_page_header.to_bytes())?; 159 | 160 | page.try_into() 161 | } 162 | 163 | pub fn iter_children(&mut self) -> impl DoubleEndedIterator + '_ { 164 | let len = if self.is_leaf() { 0 } else { self.len() + 1 }; 165 | 166 | (0..len).map(|i| self.child(i).unwrap().unwrap()) 167 | } 168 | } 169 | 170 | impl< 171 | K: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 172 | V: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 173 | > TryInto> for Page> 174 | { 175 | type Error = Error; 176 | fn try_into(mut self) -> Result> { 177 | let special_bytes = &mut self.read_special()?; 178 | let header = BTreePageHeader::from_bytes(special_bytes); 179 | 180 | Ok(BTreeNode { header, page: self }) 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/btree/spec.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | use crate::page::spec::PageNumber; 4 | 5 | #[derive(Clone, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Debug)] 6 | pub struct BTreeBlockHeader { 7 | pub root: PageNumber, 8 | } 9 | 10 | #[derive(Clone, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Debug)] 11 | pub struct BTreeCell { 12 | pub left_child: Option, 13 | pub key: K, 14 | pub value: Option, 15 | } 16 | 17 | #[derive(Clone, Eq, Ord, PartialEq, PartialOrd, Serialize, Deserialize, Debug)] 18 | pub struct BTreePair { 19 | pub key: K, 20 | pub value: Option, 21 | } 22 | 23 | impl BTreeCell { 24 | pub fn to_pair(self) -> BTreePair { 25 | BTreePair { 26 | key: self.key, 27 | value: self.value, 28 | } 29 | } 30 | } 31 | 32 | pub const BTREE_PAGE_HEADER_SIZE: u16 = 9; 33 | 34 | #[derive(Serialize, Deserialize, Debug, PartialEq, PartialOrd)] 35 | pub struct BTreePageHeader { 36 | pub kind: PageType, 37 | pub right_child: Option, 38 | } 39 | 40 | #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Copy, Clone, PartialOrd, Ord)] 41 | pub enum PageType { 42 | Leaf = 0, 43 | Internal = 1, 44 | Root = 2, 45 | } 46 | 47 | impl BTreePageHeader { 48 | pub fn new(kind: PageType, right_child: Option) -> Self { 49 | Self { kind, right_child } 50 | } 51 | 52 | pub fn from_bytes(bytes: &[u8]) -> Self { 53 | bincode::deserialize(bytes).unwrap() 54 | } 55 | 56 | pub fn to_bytes(&self) -> Vec { 57 | bincode::serialize(self).unwrap() 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | if option_env!("CARGO_PRIMARY_PACKAGE").is_some() { 3 | hooky::init(true) 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /src/collection/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod strategies; 2 | pub mod xact; 3 | pub mod xlog; 4 | 5 | use crate::btree::{BTree, ValueTrait}; 6 | use crate::dustdata_config; 7 | use crate::error::{Error, Result}; 8 | use crate::page::io::BlockIO; 9 | use std::fs; 10 | use std::sync::Mutex; 11 | use std::{fmt::Debug, sync::Arc}; 12 | use xact::TransactionBuilder; 13 | use xlog::{XLog, XLogOperation}; 14 | 15 | pub use xact::Transaction; 16 | 17 | #[derive(Debug, Clone)] 18 | pub enum TransactionStatus { 19 | Active, 20 | Committed, 21 | RolledBack, 22 | Aborted, 23 | } 24 | 25 | pub const COLLECTION_LOCK_FILE: &str = ".lock"; 26 | 27 | /// Collection is a key-value store 28 | pub struct Collection { 29 | btree: Arc>>, 30 | xlog: Arc>>, 31 | name: String, 32 | } 33 | 34 | impl Collection { 35 | pub fn new(name: &str) -> Result { 36 | let dustdata_config = dustdata_config(); 37 | let base_path = dustdata_config.data_path.join(name); 38 | 39 | fs::create_dir_all(&base_path).ok(); 40 | 41 | // Create lock file 42 | fs::File::create_new(base_path.join(COLLECTION_LOCK_FILE)) 43 | .map_err(|_| Error::DatabaseLocked)?; 44 | 45 | let xlog = Arc::new(Mutex::new(xlog::XLog::new(&base_path)?)); 46 | 47 | let btree_block = BlockIO::new(base_path.join("Data.db")).map_err(Error::IoError)?; 48 | let btree = Arc::new(Mutex::new(BTree::new(btree_block)?)); 49 | 50 | Ok(Self { 51 | btree, 52 | xlog, 53 | name: name.to_string(), 54 | }) 55 | } 56 | 57 | /// Starts a new branch transaction. 58 | pub fn branch_start(&self) -> Result + '_> { 59 | let dustdata_config = dustdata_config(); 60 | let base_path = dustdata_config.data_path.join(self.name.clone()); 61 | 62 | TransactionBuilder::branch(base_path, self.name.clone(), self.btree.lock().unwrap()) 63 | } 64 | 65 | /// Starts a new transaction with a lock 66 | pub fn lock_start(&self) -> Result + '_> { 67 | let dustdata_config = dustdata_config(); 68 | let base_path = dustdata_config.data_path.join(self.name.clone()); 69 | 70 | TransactionBuilder::lock(base_path, self.name.clone(), self.btree.try_lock().unwrap()) 71 | } 72 | 73 | /// Commits a transaction 74 | pub fn commit(&self, mut xact: X) -> Result<()> 75 | where 76 | X: Transaction, 77 | { 78 | let mut page = xact.log().page()?; 79 | 80 | // unlock mutex lock 81 | drop(xact); 82 | 83 | let mut btree = self.btree.lock().unwrap(); 84 | 85 | for operation in page.iter() { 86 | match operation { 87 | XLogOperation::Insert { key, value } => btree.insert(key, value)?, 88 | XLogOperation::Delete { key } => btree.delete(&key)?, 89 | XLogOperation::Update { key, new_value } => { 90 | btree.delete(&key)?; 91 | btree.insert(key, new_value)? 92 | } 93 | XLogOperation::Drop => {} 94 | } 95 | } 96 | 97 | Ok(()) 98 | } 99 | 100 | /// Checks if the collection contains a key 101 | pub fn contains(&self, key: &str) -> Result { 102 | self.btree.lock().unwrap().contains(&key.to_string()) 103 | } 104 | 105 | /// Gets a value from the collection 106 | pub fn get(&self, key: &str) -> Result> { 107 | self.btree.lock().unwrap().get(&key.to_string()) 108 | } 109 | } 110 | 111 | impl Drop for Collection { 112 | fn drop(&mut self) { 113 | let dustdata_config = dustdata_config(); 114 | let base_path = dustdata_config.data_path.join(&self.name); 115 | 116 | fs::remove_file(base_path.join(COLLECTION_LOCK_FILE)).unwrap(); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/collection/strategies/branch.rs: -------------------------------------------------------------------------------- 1 | use std::fs; 2 | 3 | use crate::{ 4 | btree::{BTree, ValueTrait}, 5 | collection::xact::{Transaction, TransactionOperations}, 6 | dustdata_config, 7 | error::Error, 8 | }; 9 | 10 | pub struct BranchTransaction { 11 | pub xid: u64, 12 | pub btree: BTree, 13 | pub xact_op: TransactionOperations, 14 | pub collection_name: String, 15 | } 16 | 17 | impl Transaction for BranchTransaction { 18 | fn rollback(self) {} 19 | 20 | fn xid(&self) -> u64 { 21 | self.xid 22 | } 23 | 24 | fn log(&mut self) -> &mut TransactionOperations { 25 | &mut self.xact_op 26 | } 27 | 28 | fn btree(&mut self) -> &mut BTree { 29 | &mut self.btree 30 | } 31 | } 32 | 33 | impl Drop for BranchTransaction { 34 | fn drop(&mut self) { 35 | let dustdata_config = dustdata_config(); 36 | let base_path = dustdata_config.data_path.join(&self.collection_name); 37 | 38 | let xid = self.xid(); 39 | 40 | let xact_file = base_path.join(format!("Data.xact.{}", xid)); 41 | let xlog_file = base_path.join(format!("Data.xlog.{}", xid)); 42 | 43 | fs::remove_file(xact_file).map_err(Error::IoError).unwrap(); 44 | fs::remove_file(xlog_file).map_err(Error::IoError).unwrap(); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/collection/strategies/lock.rs: -------------------------------------------------------------------------------- 1 | use std::{fs, sync::MutexGuard}; 2 | 3 | use crate::{ 4 | btree::{BTree, ValueTrait}, 5 | collection::xact::{Transaction, TransactionOperations}, 6 | dustdata_config, 7 | error::Error, 8 | }; 9 | 10 | pub struct LockTransaction<'lock, T: ValueTrait> { 11 | pub xid: u64, 12 | pub _lock: MutexGuard<'lock, BTree>, 13 | pub btree: BTree, 14 | pub xact_op: TransactionOperations, 15 | pub collection_name: String, 16 | } 17 | 18 | impl<'lock, T: ValueTrait> Transaction for LockTransaction<'lock, T> { 19 | fn rollback(self) {} 20 | 21 | fn xid(&self) -> u64 { 22 | self.xid 23 | } 24 | 25 | fn log(&mut self) -> &mut TransactionOperations { 26 | &mut self.xact_op 27 | } 28 | 29 | fn btree(&mut self) -> &mut BTree { 30 | &mut self.btree 31 | } 32 | } 33 | 34 | impl<'a, T: ValueTrait> Drop for LockTransaction<'a, T> { 35 | fn drop(&mut self) { 36 | let dustdata_config = dustdata_config(); 37 | let base_path = dustdata_config.data_path.join(&self.collection_name); 38 | 39 | let xid = self.xid(); 40 | 41 | let xact_file = base_path.join(format!("Data.xact.{}", xid)); 42 | let xlog_file = base_path.join(format!("Data.xlog.{}", xid)); 43 | 44 | fs::remove_file(xact_file).map_err(Error::IoError).unwrap(); 45 | fs::remove_file(xlog_file).map_err(Error::IoError).unwrap(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/collection/strategies/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod branch; 2 | pub mod lock; 3 | -------------------------------------------------------------------------------- /src/collection/xact.rs: -------------------------------------------------------------------------------- 1 | use std::{marker::PhantomData, path::Path, sync::MutexGuard, time}; 2 | 3 | use crate::{ 4 | btree::{spec::BTreePair, BTree, BTreeIterator, ValueTrait}, 5 | error::{Error, Result}, 6 | page::{io::BlockIO, Page}, 7 | }; 8 | 9 | use super::{ 10 | strategies::{branch::BranchTransaction, lock::LockTransaction}, 11 | xlog::XLogOperation, 12 | }; 13 | 14 | pub struct TransactionOperations { 15 | io: BlockIO, 16 | _t: PhantomData, 17 | } 18 | 19 | impl TransactionOperations { 20 | pub fn new(io: BlockIO) -> Self { 21 | Self { 22 | io, 23 | _t: PhantomData, 24 | } 25 | } 26 | 27 | pub fn write(&mut self, operation: XLogOperation) -> Result<()> { 28 | let mut page: Page> = self.io.read_page(0).map_err(Error::IoError)?; 29 | 30 | page.write(operation)?; 31 | 32 | self.io.write_page(0, &page).map_err(Error::IoError)?; 33 | 34 | Ok(()) 35 | } 36 | 37 | pub fn page(&mut self) -> Result>> { 38 | let page: Page> = self.io.read_page(0).map_err(Error::IoError)?; 39 | 40 | Ok(page) 41 | } 42 | } 43 | 44 | pub trait Transaction { 45 | fn get(&mut self, key: &str) -> Result> { 46 | self.btree().get(&key.to_string()) 47 | } 48 | 49 | fn insert(&mut self, key: &str, value: T) -> Result<()> { 50 | self.log().write(XLogOperation::Insert { 51 | key: key.to_string(), 52 | value: value.clone(), 53 | })?; 54 | 55 | self.btree().insert(key.to_string(), value) 56 | } 57 | 58 | fn delete(&mut self, key: &str) -> Result<()> { 59 | self.log().write(XLogOperation::Delete { 60 | key: key.to_string(), 61 | })?; 62 | 63 | self.btree().delete(&key.to_string()) 64 | } 65 | 66 | fn update(&mut self, key: &str, new_value: T) -> Result<()> { 67 | self.log().write(XLogOperation::Update { 68 | key: key.to_string(), 69 | new_value: new_value.clone(), 70 | })?; 71 | 72 | self.btree().delete(&key.to_string())?; 73 | self.btree().insert(key.to_string(), new_value)?; 74 | 75 | Ok(()) 76 | } 77 | 78 | fn iter(&mut self) -> BTreeIterator<'_, String, T> { 79 | self.btree().iter() 80 | } 81 | 82 | fn find_by_pattern<'a>( 83 | &'a mut self, 84 | pattern: &'a str, 85 | ) -> Box<(dyn Iterator> + 'a)> 86 | where 87 | T: 'a, 88 | { 89 | Box::new(self.btree().find_pattern(pattern)) 90 | } 91 | 92 | fn rollback(self); 93 | fn xid(&self) -> u64; 94 | fn log(&mut self) -> &mut TransactionOperations; 95 | fn btree(&mut self) -> &mut BTree; 96 | } 97 | 98 | pub struct TransactionBuilder; 99 | 100 | impl TransactionBuilder { 101 | pub fn branch( 102 | base_path: P, 103 | collection_name: String, 104 | mut data: MutexGuard<'_, BTree>, 105 | ) -> Result + '_> 106 | where 107 | T: ValueTrait, 108 | P: AsRef, 109 | { 110 | let xid = gen_xid(); 111 | 112 | let xlog_block = BlockIO::new(base_path.as_ref().join(format!("Data.xlog.{}", xid))) 113 | .map_err(Error::IoError)?; 114 | let xact_op = TransactionOperations::new(xlog_block); 115 | 116 | let xact_block = data 117 | .io 118 | .copy_to(base_path.as_ref().join(format!("Data.xact.{}", xid))) 119 | .map_err(Error::IoError)?; 120 | let btree = BTree::new(xact_block)?; 121 | 122 | Ok(BranchTransaction { 123 | collection_name, 124 | xid, 125 | btree, 126 | xact_op, 127 | }) 128 | } 129 | 130 | pub fn lock( 131 | base_path: P, 132 | collection_name: String, 133 | mut data: MutexGuard<'_, BTree>, 134 | ) -> Result + '_> 135 | where 136 | T: ValueTrait, 137 | P: AsRef, 138 | { 139 | let xid = gen_xid(); 140 | 141 | let xlog_block = BlockIO::new(base_path.as_ref().join(format!("Data.xlog.{}", xid))) 142 | .map_err(Error::IoError)?; 143 | let xact_op = TransactionOperations::new(xlog_block); 144 | 145 | let xact_block = data 146 | .io 147 | .copy_to(base_path.as_ref().join(format!("Data.xact.{}", xid))) 148 | .map_err(Error::IoError)?; 149 | let btree = BTree::new(xact_block)?; 150 | 151 | Ok(LockTransaction { 152 | collection_name, 153 | xid, 154 | btree, 155 | xact_op, 156 | _lock: data, 157 | }) 158 | } 159 | } 160 | 161 | pub fn gen_xid() -> u64 { 162 | (time::SystemTime::now() 163 | .duration_since(time::UNIX_EPOCH) 164 | .unwrap() 165 | .as_micros()) 166 | .try_into() 167 | .unwrap() 168 | } 169 | -------------------------------------------------------------------------------- /src/collection/xlog.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | btree::{BTree, BTreeIterator, ValueTrait}, 3 | error::{Error, Result}, 4 | page::io::BlockIO, 5 | }; 6 | 7 | use serde::{Deserialize, Serialize}; 8 | use std::{ops::RangeBounds, path::Path}; 9 | 10 | pub const XLOG_FILENAME: &str = ".xlog"; 11 | 12 | #[derive(Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord, Clone, Debug)] 13 | pub enum XLogOperation { 14 | Insert { key: String, value: T }, 15 | Update { key: String, new_value: T }, 16 | Delete { key: String }, 17 | Drop, 18 | } 19 | 20 | pub struct XLog { 21 | btree: BTree>>, 22 | } 23 | 24 | impl XLog { 25 | pub fn new

(path: P) -> Result 26 | where 27 | P: AsRef, 28 | { 29 | let log_path = path.as_ref().join(XLOG_FILENAME); 30 | let io = BlockIO::new(log_path).map_err(Error::IoError)?; 31 | 32 | let btree = BTree::new(io)?; 33 | 34 | Ok(Self { btree }) 35 | } 36 | 37 | pub fn write(&mut self, xid: u64, ops: Vec>) -> Result<()> { 38 | self.btree.insert(xid, ops) 39 | } 40 | 41 | pub fn read(&mut self, xid: u64) -> Result>>> { 42 | self.btree.get(&xid) 43 | } 44 | 45 | pub fn range(&mut self, range: R) -> Result>>> 46 | where 47 | R: RangeBounds, 48 | { 49 | self.btree.range(range) 50 | } 51 | 52 | pub fn head_id(&mut self) -> Option { 53 | self.btree.iter().last().map(|c| c.key) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | path::{Path, PathBuf}, 3 | sync::OnceLock, 4 | }; 5 | 6 | static COMPRESSION_CONFIG: OnceLock = OnceLock::new(); 7 | static DUSTDATA_CONFIG: OnceLock = OnceLock::new(); 8 | 9 | pub(super) fn compression_config() -> &'static CompressionConfig { 10 | COMPRESSION_CONFIG.get_or_init(CompressionConfig::default) 11 | } 12 | 13 | pub(super) fn dustdata_config() -> &'static DustDataConfig { 14 | DUSTDATA_CONFIG.get_or_init(DustDataConfig::default) 15 | } 16 | 17 | #[derive(Debug, Clone, PartialEq, Eq)] 18 | pub enum OpenOptions { 19 | /// Open the database in read-only mode. 20 | ReadOnly, 21 | /// Open the database in read-write mode. 22 | ReadWrite, 23 | } 24 | 25 | #[derive(Debug, Clone)] 26 | pub struct DustDataConfig { 27 | pub data_path: PathBuf, 28 | pub open_options: OpenOptions, 29 | } 30 | 31 | impl Default for DustDataConfig { 32 | fn default() -> Self { 33 | Self::new() 34 | } 35 | } 36 | 37 | impl DustDataConfig { 38 | pub fn new() -> Self { 39 | Self { 40 | data_path: PathBuf::from("./data"), 41 | open_options: OpenOptions::ReadWrite, 42 | } 43 | } 44 | 45 | /// The path to the data directory. 46 | /// Default: ./data 47 | pub fn data_path>(mut self, data_path: P) -> Self { 48 | self.data_path = data_path.as_ref().to_path_buf(); 49 | self 50 | } 51 | 52 | /// The open options for the database. 53 | /// Default: OpenOptions::ReadWrite 54 | /// This is the mode in which the database is opened. 55 | pub fn open_options(mut self, open_options: OpenOptions) -> Self { 56 | self.open_options = open_options; 57 | self 58 | } 59 | 60 | pub fn build(self) { 61 | DUSTDATA_CONFIG.set(self).unwrap(); 62 | } 63 | } 64 | 65 | #[derive(Debug, Clone)] 66 | pub struct CompressionConfig { 67 | /// The compression level. 68 | /// 69 | /// The integer here is typically on a scale of 0-9 where 0 means "no 70 | /// compression" and 9 means "take as long as you'd like". 71 | pub level: u32, 72 | /// Enable compression 73 | pub enabled: bool, 74 | } 75 | 76 | impl Default for CompressionConfig { 77 | fn default() -> Self { 78 | Self::new() 79 | } 80 | } 81 | 82 | impl CompressionConfig { 83 | pub fn new() -> Self { 84 | Self { 85 | level: 6, 86 | enabled: false, 87 | } 88 | } 89 | 90 | /// The compression level. 91 | /// 92 | /// Default: 6 93 | pub fn level(&mut self, level: u32) -> &mut Self { 94 | self.level = level; 95 | self 96 | } 97 | 98 | /// Enable compression 99 | /// 100 | /// Default: true 101 | pub fn enabled(&mut self, enabled: bool) -> &mut Self { 102 | self.enabled = enabled; 103 | self 104 | } 105 | 106 | pub fn build(self) { 107 | COMPRESSION_CONFIG.set(self).unwrap(); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use glob::PatternError; 4 | 5 | pub enum Error { 6 | IoError(std::io::Error), 7 | SerializeError(bincode::Error), 8 | Deadlock, 9 | DatabaseLocked, 10 | AlreadyExists(String), 11 | NotFound(String), 12 | CorruptedData(CorruptedDataError), 13 | Other(String), 14 | Cannot(String), 15 | PatternError(PatternError), 16 | } 17 | 18 | impl Debug for Error { 19 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 20 | match self { 21 | Error::IoError(err) => write!(f, "IO Error: {}", err), 22 | Error::Deadlock => write!(f, "Deadlock"), 23 | Error::DatabaseLocked => { 24 | write!(f, "Database is locked, maybe another instance is running?") 25 | } 26 | Error::Other(err) => write!(f, "Other Error: {}", err), 27 | Error::CorruptedData(err) => write!(f, "{:?}", err), 28 | Error::AlreadyExists(message) => write!(f, "{} already exists", message), 29 | Error::NotFound(message) => write!(f, "{} not found", message), 30 | Error::Cannot(message) => write!(f, "cannot {}", message), 31 | Error::SerializeError(error) => write!(f, "serialize error {}", error), 32 | Error::PatternError(error) => write!(f, "pattern error {}", error), 33 | } 34 | } 35 | } 36 | 37 | pub struct CorruptedDataError { 38 | pub kind: CorruptedDataKind, 39 | pub message: String, 40 | } 41 | 42 | #[derive(Debug)] 43 | pub enum CorruptedDataKind { 44 | ChecksumNotMatch, 45 | UnsyncWithWAL, 46 | } 47 | 48 | impl Debug for CorruptedDataError { 49 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 50 | write!(f, "Corrupted data: {}. {:?}", self.message, self.kind) 51 | } 52 | } 53 | 54 | pub type Result = std::result::Result; 55 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! # DustData 2 | //! 3 | //! `dustdata` is a data engine written in Rust. It is designed to be fast, reliable and easy to use. 4 | //! It is a key-value store with support for multiple data types. 5 | //! 6 | //! ## Usage 7 | //! Initialize a new `DustData` instance with the default configuration: 8 | //! ```rust 9 | //! use dustdata::DustData; 10 | //! 11 | //! let mut dustdata = DustData::new(Default::default()).unwrap(); 12 | //! ``` 13 | //! 14 | //! ## Inserting data into a collection 15 | //! 16 | //! ```rust 17 | //! #[derive(Serialize, Deserialize, Clone, Debug)] 18 | //! struct User { 19 | //! name: String, 20 | //! age: u32, 21 | //! } 22 | //! 23 | //! let collection = dustdata.collection::("users"); 24 | //! 25 | //! let user = User { 26 | //! name: "Pedro".to_string(), 27 | //! age: 21, 28 | //! }; 29 | //! 30 | //! // Creating a new transaction. 31 | //! let mut transaction = collection.start_branch(); 32 | //! 33 | //! // Inserting the user into the transaction. 34 | //! transaction.insert("user:1", user); 35 | //! 36 | //! // Committing the transaction. 37 | //! collection.commit(transaction).unwrap(); 38 | //! 39 | //! // Done! 40 | //! ``` 41 | //! ## Reading data from a collection 42 | //! 43 | //! ```rust 44 | //! let collection = dustdata.collection::("users").unwrap(); 45 | //! 46 | //! let user = collection.get("user:1").unwrap(); 47 | //! ``` 48 | 49 | pub mod btree; 50 | pub mod collection; 51 | pub mod config; 52 | pub mod error; 53 | pub mod page; 54 | mod serializer; 55 | 56 | pub use collection::Collection; 57 | pub use config::*; 58 | 59 | pub use bincode; 60 | use error::Result; 61 | use serde::{de::DeserializeOwned, Serialize}; 62 | use std::fmt::Debug; 63 | use std::fs; 64 | use std::sync::Arc; 65 | 66 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] 67 | pub enum Either { 68 | Left(L), 69 | Right(R), 70 | } 71 | 72 | impl Either { 73 | pub fn left(&self) -> Option<&L> { 74 | match self { 75 | Self::Left(l) => Some(l), 76 | _ => None, 77 | } 78 | } 79 | 80 | pub fn right(&self) -> Option<&R> { 81 | match self { 82 | Self::Right(r) => Some(r), 83 | _ => None, 84 | } 85 | } 86 | 87 | pub fn is_left(&self) -> bool { 88 | matches!(self, Self::Left(_)) 89 | } 90 | 91 | pub fn is_right(&self) -> bool { 92 | matches!(self, Self::Right(_)) 93 | } 94 | } 95 | 96 | #[derive(Debug, Clone, Default, Copy)] 97 | pub struct DustData; 98 | 99 | impl DustData { 100 | pub fn new() -> Result { 101 | let dustdata_config = dustdata_config(); 102 | 103 | fs::create_dir_all(&dustdata_config.data_path).ok(); 104 | 105 | Ok(Self) 106 | } 107 | 108 | /// Creates a new collection. 109 | /// ## Arguments 110 | /// * `name` - The name of the collection. 111 | /// ## Example 112 | /// ```rust 113 | /// use dustdata::DustData; 114 | /// 115 | /// let dustdata = DustData::new().unwrap(); 116 | /// 117 | /// let collection = dustdata.create_collection::("users").unwrap(); 118 | /// ``` 119 | pub fn collection(&self, name: &str) -> Result>> 120 | where 121 | T: Sync + Send + Clone + Debug + Serialize + DeserializeOwned + 'static + Ord, 122 | { 123 | Ok(Arc::new(collection::Collection::new(name)?)) 124 | } 125 | 126 | /// Drops a collection. 127 | /// ## Arguments 128 | /// * `name` - The name of the collection. 129 | /// ## Example 130 | /// ```rust 131 | /// use dustdata::DustData; 132 | /// 133 | /// let dustdata = DustData::new().unwrap(); 134 | /// 135 | /// dustdata.drop_collection("users").unwrap(); 136 | /// ``` 137 | pub fn drop_collection(&self, name: &str) -> Result<()> { 138 | let dustdata_config = dustdata_config(); 139 | 140 | fs::remove_dir_all(dustdata_config.data_path.join(name)) 141 | .map_err(|_| error::Error::NotFound("collection".to_owned()))?; 142 | 143 | Ok(()) 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /src/page.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | cmp::Ordering, 3 | io::{Cursor, Read, Seek, SeekFrom, Write}, 4 | marker::PhantomData, 5 | }; 6 | 7 | use crate::{ 8 | error::{CorruptedDataError, CorruptedDataKind, Error, Result}, 9 | serializer::{deserialize, serialize}, 10 | Either, 11 | }; 12 | use crc32fast::Hasher; 13 | use layout::{CellPointerFlags, CellPointerMetadata, PageHeader}; 14 | use serde::{de::DeserializeOwned, Serialize}; 15 | use spec::{LocationOffset, CELL_POINTER_SIZE, PAGE_FREE_SPACE_BYTE, PAGE_HEADER_SIZE, PAGE_SIZE}; 16 | 17 | pub mod io; 18 | pub mod layout; 19 | pub mod spec; 20 | 21 | /// Slotted page layout 22 | /// ```text 23 | /// CELL_POINTER_SIZE 24 | /// | +-> header.lower 25 | /// PAGE_HEADER_SIZE | | 26 | /// |--------|------------------| V 27 | /// +--------+-------------------+-----------------+-----------------+ -+ 28 | /// | header | cell pointer 01 | cell pointer 02 | ---> | | 29 | /// +--------+-------------------+-----------------+ | | 30 | /// | (Free space) | +- PAGE_SIZE (4096 bytes) 31 | /// | +-----------------+-----------------+-----------------+ | 32 | /// | <--- | cell data 02 | cell data 01 | special space | | 33 | /// +----------+-----------------+-----------------+-----------------+ -+ 34 | /// ^ 35 | /// | 36 | /// +-> header.upper 37 | /// ``` 38 | /// Header: contains metadata about the page, such as the type of the page, the lower and upper pointers. 39 | /// 40 | /// Additional header: additional metadata about the page. 41 | /// 42 | /// Cell pointer: contains the position of the cell in the page, the length of the cell, and the metadata of the cell. 43 | /// Cell data: the actual data of the cell. 44 | /// 45 | /// The lower pointer points to the end of the cell pointers. 46 | /// The upper pointer points to the end of the cell data. 47 | /// 48 | pub struct Page { 49 | pub header: PageHeader, 50 | io: Cursor<[u8; PAGE_SIZE as usize]>, 51 | 52 | _t: PhantomData, 53 | } 54 | 55 | impl Page { 56 | pub fn create(special_size: u16) -> Result { 57 | let mut io = Cursor::new([PAGE_FREE_SPACE_BYTE; PAGE_SIZE as usize]); 58 | 59 | let lower = PAGE_HEADER_SIZE as LocationOffset; 60 | let special = PAGE_SIZE - special_size as LocationOffset; 61 | let upper = special; 62 | 63 | let header = PageHeader { 64 | upper, 65 | lower, 66 | special, 67 | checksum: 0, 68 | }; 69 | 70 | let header_bytes = bincode::serialize(&header).map_err(Error::SerializeError)?; 71 | io.seek(SeekFrom::Start(0)).map_err(Error::IoError)?; 72 | io.write(&header_bytes).map_err(Error::IoError)?; 73 | 74 | let mut page: Page = Page { 75 | header, 76 | io, 77 | _t: PhantomData, 78 | }; 79 | 80 | // update checksum 81 | page.write_header()?; 82 | 83 | Ok(page) 84 | } 85 | 86 | pub fn open(data: [u8; PAGE_SIZE as usize]) -> Result { 87 | let mut io = Cursor::new(data); 88 | let header = Self::read_header(&mut io)?; 89 | 90 | let page = Self { 91 | io, 92 | header: header.clone(), 93 | _t: PhantomData, 94 | }; 95 | 96 | let checksum = page.checksum(); 97 | 98 | if header.checksum != checksum { 99 | return Err(Error::CorruptedData(CorruptedDataError { 100 | kind: CorruptedDataKind::ChecksumNotMatch, 101 | message: "checksum does not match".to_string(), 102 | })); 103 | } 104 | 105 | Ok(page) 106 | } 107 | 108 | pub fn write(&mut self, data: T) -> Result<(LocationOffset, LocationOffset)> { 109 | let data = serialize(&data)?; 110 | 111 | // cell_addr is the position of the cell in the page 112 | let cell_addr: LocationOffset = self.header.upper - data.len() as LocationOffset; 113 | 114 | let cell_pointer_addr = self.header.lower; 115 | 116 | // cell_addr to little endian bytes 117 | let cell_addr_binary = cell_addr.to_le_bytes(); 118 | let cell_len_binary = (data.len() as u16).to_le_bytes(); 119 | 120 | // serialize cell_pointer 121 | let mut cell_pointer: Vec = vec![0; CELL_POINTER_SIZE as usize]; 122 | cell_pointer[0..2].copy_from_slice(&cell_addr_binary); 123 | cell_pointer[2..4].copy_from_slice(&cell_len_binary); 124 | cell_pointer[4..].copy_from_slice(&CellPointerMetadata::default().to_vec()); 125 | 126 | // write to io 127 | self.io 128 | .seek(SeekFrom::Start(cell_addr as u64)) 129 | .map_err(Error::IoError)?; 130 | self.io.write(&data).map_err(Error::IoError)?; 131 | 132 | self.io 133 | .seek(SeekFrom::Start(cell_pointer_addr as u64)) 134 | .map_err(Error::IoError)?; 135 | self.io.write(&cell_pointer).map_err(Error::IoError)?; 136 | 137 | // update header 138 | self.header.upper = cell_addr; 139 | self.header.lower += CELL_POINTER_SIZE as LocationOffset; 140 | self.write_header()?; 141 | 142 | // sync file 143 | // self.io.sync().map_err(Error::IoError)?; 144 | 145 | Ok((cell_addr, cell_pointer_addr as LocationOffset)) 146 | } 147 | 148 | pub fn write_all(&mut self, data: Vec) -> Result<()> { 149 | for i in data { 150 | self.write(i)?; 151 | } 152 | 153 | Ok(()) 154 | } 155 | 156 | pub fn insert( 157 | &mut self, 158 | index: LocationOffset, 159 | data: T, 160 | ) -> Result<(LocationOffset, LocationOffset)> { 161 | let data = serialize(&data)?; 162 | 163 | let offset = self.index_to_offset(index); 164 | 165 | let cell_addr: LocationOffset = self.header.upper - data.len() as LocationOffset; 166 | 167 | let cell_addr_binary = cell_addr.to_le_bytes(); 168 | let cell_len_binary = (data.len() as u16).to_le_bytes(); 169 | 170 | let mut cell_pointer: Vec = vec![0; CELL_POINTER_SIZE as usize]; 171 | cell_pointer[0..2].copy_from_slice(&cell_addr_binary); 172 | cell_pointer[2..4].copy_from_slice(&cell_len_binary); 173 | cell_pointer[4..].copy_from_slice(&CellPointerMetadata::default().to_vec()); 174 | 175 | // shift the cells to the right 176 | let cells_pointers_to_shift_to_right_len = 177 | if offset < self.header.lower as usize - CELL_POINTER_SIZE as usize { 178 | self.header.lower as usize - offset 179 | } else { 180 | offset 181 | }; 182 | 183 | let mut buffer = vec![0; cells_pointers_to_shift_to_right_len]; 184 | self.io 185 | .seek(SeekFrom::Start(offset as u64)) 186 | .map_err(Error::IoError)?; 187 | self.io.read(&mut buffer).map_err(Error::IoError)?; 188 | 189 | // write cells pointers to the right 190 | self.io 191 | .seek(SeekFrom::Start(offset as u64 + CELL_POINTER_SIZE as u64)) 192 | .map_err(Error::IoError)?; 193 | self.io.write(&buffer).map_err(Error::IoError)?; 194 | 195 | let cell_pointer_offset = offset; 196 | 197 | // write the cell data at the cell_addr 198 | self.io 199 | .seek(SeekFrom::Start(cell_addr as u64)) 200 | .map_err(Error::IoError)?; 201 | self.io.write(&data).map_err(Error::IoError)?; 202 | 203 | // write the cell pointer at the cell_pointer_offset 204 | self.io 205 | .seek(SeekFrom::Start(cell_pointer_offset as u64)) 206 | .map_err(Error::IoError)?; 207 | self.io.write(&cell_pointer).map_err(Error::IoError)?; 208 | 209 | // update header 210 | self.header.upper = cell_addr as LocationOffset; 211 | self.header.lower += CELL_POINTER_SIZE as LocationOffset; 212 | 213 | self.write_header()?; 214 | 215 | Ok((cell_addr, cell_pointer_offset as LocationOffset)) 216 | } 217 | 218 | pub fn replace(&mut self, index: LocationOffset, data: T) -> Result { 219 | let data = serialize(&data)?; 220 | 221 | let offset = self.index_to_offset(index); 222 | 223 | let old_cell = self.read_at(offset)?.unwrap(); 224 | let (cell_addr, cell_len, _) = self.read_cell_pointer(offset)?; 225 | 226 | if data.len() > cell_len.into() { 227 | // TODO: overflow 228 | unimplemented!("overflow on replace") 229 | } 230 | 231 | let cell_addr_binary = cell_addr.to_le_bytes(); 232 | let cell_len_binary = (data.len() as u16).to_le_bytes(); 233 | 234 | let mut cell_pointer: Vec = vec![0; CELL_POINTER_SIZE as usize]; 235 | cell_pointer[0..2].copy_from_slice(&cell_addr_binary); 236 | cell_pointer[2..4].copy_from_slice(&cell_len_binary); 237 | cell_pointer[4..].copy_from_slice(&CellPointerMetadata::default().to_vec()); 238 | 239 | // write the cell data at the cell_addr 240 | self.io 241 | .seek(SeekFrom::Start(cell_addr as u64)) 242 | .map_err(Error::IoError)?; 243 | self.io.write(&data).map_err(Error::IoError)?; 244 | 245 | // write the cell pointer at the offset 246 | self.io 247 | .seek(SeekFrom::Start(offset as u64)) 248 | .map_err(Error::IoError)?; 249 | self.io.write(&cell_pointer).map_err(Error::IoError)?; 250 | 251 | // update header 252 | self.header.upper = cell_addr as LocationOffset; 253 | 254 | self.write_header()?; 255 | 256 | Ok(old_cell) 257 | } 258 | 259 | pub fn read(&mut self, index: LocationOffset) -> Result> { 260 | let offset = self.index_to_offset(index); 261 | 262 | self.read_at(offset) 263 | } 264 | 265 | pub fn read_at(&mut self, offset: usize) -> Result> { 266 | if offset >= self.header.lower as usize { 267 | return Ok(None); 268 | } 269 | 270 | // preallocate a buffer to read the page 271 | let mut buffer = [0; PAGE_SIZE as usize]; 272 | 273 | // read the page into the buffer 274 | self.io.seek(SeekFrom::Start(0)).map_err(Error::IoError)?; 275 | self.io.read(&mut buffer).map_err(Error::IoError)?; 276 | 277 | // create a cursor to read the buffer 278 | let mut buffer = Cursor::new(buffer); 279 | 280 | let (cell_addr, cell_len, cell_metadata) = self.read_cell_pointer(offset)?; 281 | 282 | if cell_metadata.flags == CellPointerFlags::Deleted as u8 { 283 | return Ok(None); 284 | } 285 | 286 | let mut data = vec![0; cell_len as usize]; 287 | buffer 288 | .seek(SeekFrom::Start(cell_addr as u64)) 289 | .map_err(Error::IoError)?; 290 | buffer.read_exact(&mut data).unwrap(); 291 | 292 | let data = deserialize(&data)?; 293 | 294 | Ok(Some(data)) 295 | } 296 | 297 | fn read_cell_pointer( 298 | &mut self, 299 | offset: usize, 300 | ) -> Result<(LocationOffset, LocationOffset, CellPointerMetadata)> { 301 | let mut cell_pointer = [0; CELL_POINTER_SIZE as usize]; 302 | 303 | self.io 304 | .seek(SeekFrom::Start(offset as u64)) 305 | .map_err(Error::IoError)?; 306 | self.io 307 | .read_exact(&mut cell_pointer) 308 | .map_err(Error::IoError)?; 309 | 310 | let cell_addr = LocationOffset::from_le_bytes(cell_pointer[0..2].try_into().unwrap()); 311 | let cell_len = LocationOffset::from_le_bytes(cell_pointer[2..4].try_into().unwrap()); 312 | let cell_metadata = CellPointerMetadata::from_slice(&cell_pointer[4..]); 313 | 314 | Ok((cell_addr, cell_len, cell_metadata)) 315 | } 316 | 317 | pub fn binary_search_by(&mut self, mut f: F) -> Either 318 | where 319 | F: FnMut(&T) -> Ordering, 320 | { 321 | let mut size = self.len(); 322 | let mut left = 0; 323 | let mut right = size; 324 | 325 | while left < right { 326 | let mid: LocationOffset = left + size / 2; 327 | 328 | let data = self.read(mid).unwrap().unwrap(); 329 | 330 | match f(&data) { 331 | Ordering::Less => left = mid + 1, 332 | Ordering::Greater => right = mid, 333 | Ordering::Equal => return Either::Left(mid), 334 | } 335 | 336 | size = right - left; 337 | } 338 | 339 | Either::Right(left) 340 | } 341 | 342 | pub fn binary_search(&mut self, x: &T) -> Either { 343 | self.binary_search_by(|a| a.cmp(x)) 344 | } 345 | 346 | pub fn binary_search_by_key(&mut self, b: &B, mut f: F) -> Either 347 | where 348 | F: FnMut(&T) -> B, 349 | B: Ord, 350 | { 351 | self.binary_search_by(|k| f(k).cmp(b)) 352 | } 353 | 354 | pub fn linear_search_by(&mut self, mut f: F) -> Either 355 | where 356 | F: FnMut(&T) -> Ordering, 357 | { 358 | let size = self.len(); 359 | let mut pointer = 0; 360 | 361 | while pointer < size { 362 | let data = self.read(pointer).unwrap().unwrap(); 363 | 364 | if f(&data).is_eq() { 365 | return Either::Left(pointer); 366 | } 367 | 368 | pointer += 1; 369 | } 370 | 371 | Either::Right(pointer) 372 | } 373 | 374 | pub fn linear_search(&mut self, x: &T) -> Either { 375 | self.linear_search_by(|a| a.cmp(x)) 376 | } 377 | 378 | pub fn linear_search_by_key(&mut self, b: &B, mut f: F) -> Either 379 | where 380 | F: FnMut(&T) -> B, 381 | B: Ord, 382 | { 383 | self.linear_search_by(|k| f(k).cmp(b)) 384 | } 385 | 386 | pub fn delete(&mut self, index: LocationOffset) -> Result<()> { 387 | let offset = self.index_to_offset(index); 388 | 389 | self.delete_at(offset) 390 | } 391 | 392 | pub fn delete_at(&mut self, offset: usize) -> Result<()> { 393 | let cell_pointer_metadata_offset = offset + 4; 394 | 395 | self.io 396 | .seek(SeekFrom::Start(cell_pointer_metadata_offset as u64)) 397 | .map_err(Error::IoError)?; 398 | self.io 399 | .write(&[CellPointerFlags::Deleted as u8]) 400 | .map_err(Error::IoError)?; 401 | 402 | self.write_header()?; 403 | 404 | Ok(()) 405 | } 406 | 407 | pub fn delete_range(&mut self, range: R) -> Result<()> 408 | where 409 | R: Iterator, 410 | { 411 | for i in range { 412 | let offset = self.index_to_offset(i); 413 | 414 | let cell_pointer_metadata_offset = offset + 4; 415 | 416 | self.io 417 | .seek(SeekFrom::Start(cell_pointer_metadata_offset as u64)) 418 | .map_err(Error::IoError)?; 419 | self.io 420 | .write(&[CellPointerFlags::Deleted as u8]) 421 | .map_err(Error::IoError)?; 422 | } 423 | 424 | self.write_header()?; 425 | 426 | Ok(()) 427 | } 428 | 429 | pub fn compact(mut self) -> Result { 430 | let data = self.values()?; 431 | let mut page = Self::create(self.special_size())?; 432 | page.write_special(&self.read_special()?)?; 433 | 434 | for data in data { 435 | page.write(data)?; 436 | } 437 | 438 | Ok(page) 439 | } 440 | 441 | pub fn values(&mut self) -> Result> { 442 | let mut data = Vec::new(); 443 | 444 | for i in 0..self.len() { 445 | if let Some(cell) = self.read(i)? { 446 | data.push(cell); 447 | } 448 | } 449 | 450 | Ok(data) 451 | } 452 | 453 | pub fn split_at(&mut self, index: LocationOffset) -> Result<(Vec, Vec)> { 454 | let values = self.values()?; 455 | let split = values.split_at(index as usize); 456 | 457 | Ok((split.0.to_vec(), split.1.to_vec())) 458 | } 459 | 460 | pub fn split_off(&mut self, index: LocationOffset) -> Result> { 461 | let mut values = self.values()?; 462 | let values = values.split_off(index.into()); 463 | 464 | self.delete_range(index..self.len())?; 465 | 466 | Ok(values) 467 | } 468 | 469 | pub fn write_special(&mut self, data: &[u8]) -> Result<()> { 470 | assert!(data.len() as LocationOffset <= self.special_size()); 471 | 472 | let special = self.header.special as usize; 473 | 474 | self.io 475 | .seek(SeekFrom::Start(special as u64)) 476 | .map_err(Error::IoError)?; 477 | self.io.write(data).map_err(Error::IoError)?; 478 | 479 | self.write_header()?; 480 | 481 | Ok(()) 482 | } 483 | 484 | pub fn read_special(&mut self) -> Result> { 485 | let mut buffer = vec![0; self.special_size() as usize]; 486 | 487 | self.io 488 | .seek(SeekFrom::Start(self.header.special as u64)) 489 | .map_err(Error::IoError)?; 490 | self.io.read(&mut buffer).map_err(Error::IoError)?; 491 | 492 | Ok(buffer) 493 | } 494 | 495 | pub fn remaining_space(&self) -> u16 { 496 | self.header.upper - self.header.lower 497 | } 498 | 499 | pub fn is_empty(&self) -> bool { 500 | self.len() == 0 501 | } 502 | 503 | pub fn len(&self) -> u16 { 504 | (self.header.lower - self.header_size() as LocationOffset) 505 | / CELL_POINTER_SIZE as LocationOffset 506 | } 507 | 508 | pub fn index_to_offset(&self, index: LocationOffset) -> usize { 509 | self.header_size() + (index as usize * CELL_POINTER_SIZE as usize) 510 | } 511 | 512 | pub fn to_bytes(&self) -> std::io::Result<[u8; PAGE_SIZE as usize]> { 513 | let buffer = self.io.clone().into_inner(); 514 | 515 | Ok(buffer) 516 | } 517 | 518 | pub fn iter(&mut self) -> PageIterator<'_, T> { 519 | PageIterator::new(self) 520 | } 521 | 522 | fn header_size(&self) -> usize { 523 | PAGE_HEADER_SIZE 524 | } 525 | 526 | pub fn special_size(&self) -> u16 { 527 | PAGE_SIZE - self.header.special 528 | } 529 | 530 | fn write_header(&mut self) -> Result<()> { 531 | let checksum = self.checksum(); 532 | self.header.checksum = checksum; 533 | 534 | let buffer = bincode::serialize(&self.header).map_err(Error::SerializeError)?; 535 | 536 | self.io.seek(SeekFrom::Start(0)).map_err(Error::IoError)?; 537 | self.io.write(&buffer).map_err(Error::IoError)?; 538 | 539 | Ok(()) 540 | } 541 | 542 | fn read_header(io: &mut Cursor<[u8; PAGE_SIZE as usize]>) -> Result { 543 | let mut buffer = vec![0; PAGE_HEADER_SIZE]; 544 | 545 | io.seek(SeekFrom::Start(0)).map_err(Error::IoError)?; 546 | io.read(&mut buffer).map_err(Error::IoError)?; 547 | 548 | bincode::deserialize(&buffer).map_err(Error::SerializeError) 549 | } 550 | 551 | fn checksum(&self) -> u32 { 552 | let mut hasher = Hasher::new(); 553 | hasher.update(&self.io.get_ref()[PAGE_HEADER_SIZE..]); 554 | hasher.finalize() 555 | } 556 | } 557 | 558 | pub struct PageIterator<'p, T> { 559 | pos: LocationOffset, 560 | page: &'p mut Page, 561 | } 562 | 563 | impl<'p, T> PageIterator<'p, T> { 564 | pub fn new(page: &'p mut Page) -> Self { 565 | Self { page, pos: 0 } 566 | } 567 | } 568 | 569 | impl<'p, T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone> Iterator 570 | for PageIterator<'p, T> 571 | { 572 | type Item = T; 573 | 574 | fn next(&mut self) -> Option { 575 | if self.pos >= self.page.len() { 576 | return None; 577 | } 578 | 579 | let cell = self.page.read(self.pos).unwrap(); 580 | 581 | self.pos += 1; 582 | 583 | cell 584 | } 585 | 586 | fn last(self) -> Option 587 | where 588 | Self: Sized, 589 | { 590 | self.page.read(self.page.len() - 1).unwrap() 591 | } 592 | } 593 | 594 | #[cfg(test)] 595 | mod page_tests { 596 | use super::*; 597 | 598 | #[test] 599 | fn create_page() { 600 | let mut page = Page::::create(0).unwrap(); 601 | 602 | page.write(32).unwrap(); 603 | page.write(54).unwrap(); 604 | 605 | let num1 = page.read(0).unwrap().unwrap(); 606 | let num2 = page.read(1).unwrap().unwrap(); 607 | 608 | assert_eq!(num1, 32); 609 | assert_eq!(num2, 54); 610 | } 611 | 612 | #[test] 613 | fn delete_values_in_page() { 614 | let mut page = Page::::create(0).unwrap(); 615 | 616 | page.write(true).unwrap(); 617 | page.write(false).unwrap(); 618 | page.write(true).unwrap(); 619 | 620 | let value = page.read(1).unwrap().unwrap(); 621 | 622 | assert!(!value); 623 | 624 | page.delete(1).unwrap(); 625 | 626 | let value = page.read(1).unwrap(); 627 | let len = page.len(); 628 | 629 | assert_eq!(value, None); 630 | assert_eq!(len, 3); 631 | 632 | let page = page.compact().unwrap(); 633 | let len = page.len(); 634 | 635 | assert_eq!(len, 2) 636 | } 637 | 638 | #[test] 639 | fn insert_value_page() { 640 | let mut page = Page::::create(0).unwrap(); 641 | 642 | page.write("first value".to_string()).unwrap(); 643 | page.write("second value".to_string()).unwrap(); 644 | page.write("third value".to_string()).unwrap(); 645 | 646 | let value = page.read(1).unwrap().unwrap(); 647 | 648 | assert_eq!(value, "second value".to_string()); 649 | 650 | page.insert(1, "inserted value".to_string()).unwrap(); 651 | 652 | let value = page.read(1).unwrap().unwrap(); 653 | 654 | assert_eq!(value, "inserted value".to_string()); 655 | 656 | let value = page.read(2).unwrap().unwrap(); 657 | 658 | assert_eq!(value, "second value".to_string()); 659 | 660 | let len = page.len(); 661 | 662 | assert_eq!(len, 4) 663 | } 664 | 665 | #[test] 666 | fn binary_search_page_test() { 667 | let mut page = Page::<(u32, String)>::create(0).unwrap(); 668 | 669 | page.write((1, "Pedro".to_string())).unwrap(); 670 | page.write((2, "John".to_string())).unwrap(); 671 | page.write((5, "Ana".to_string())).unwrap(); 672 | page.write((8, "Jane".to_string())).unwrap(); 673 | page.write((10, "Beatriz".to_string())).unwrap(); 674 | 675 | let found = page.binary_search_by_key(&8, |e| e.0); 676 | 677 | assert_eq!(found, Either::Left(3)); 678 | 679 | let found_value = page.read(*found.left().unwrap()).unwrap().unwrap(); 680 | 681 | assert_eq!(found_value, (8, "Jane".to_string())); 682 | 683 | let found = page.binary_search_by_key(&9, |e| e.0); 684 | 685 | assert_eq!(found, Either::Right(4)); 686 | } 687 | 688 | #[test] 689 | fn special_size_page_test() { 690 | let mut page = Page::::create(4).unwrap(); 691 | 692 | page.write(32).unwrap(); 693 | page.write(16).unwrap(); 694 | 695 | page.write_special(&[20, 10, 5, 2]).unwrap(); 696 | 697 | let value = page.read(0).unwrap().unwrap(); 698 | 699 | assert_eq!(value, 32); 700 | 701 | let special = page.read_special().unwrap(); 702 | 703 | assert_eq!(special, vec![20, 10, 5, 2]) 704 | } 705 | 706 | #[test] 707 | fn replace_page_test() { 708 | let mut page = Page::::create(0).unwrap(); 709 | 710 | page.write(42).unwrap(); 711 | page.write(15).unwrap(); 712 | 713 | let value = page.read(0).unwrap().unwrap(); 714 | 715 | assert_eq!(value, 42); 716 | 717 | page.replace(0, 90).unwrap(); 718 | 719 | let value = page.read(0).unwrap().unwrap(); 720 | 721 | assert_eq!(value, 90); 722 | } 723 | 724 | #[test] 725 | fn page_checksum() { 726 | let mut page = Page::::create(0).unwrap(); 727 | 728 | page.write(99).unwrap(); 729 | 730 | let mut page_bytes = page.to_bytes().unwrap(); 731 | //change a random byte 732 | page_bytes[26] = 2u8; 733 | 734 | Page::::open(page_bytes).err().unwrap(); 735 | } 736 | } 737 | -------------------------------------------------------------------------------- /src/page/io.rs: -------------------------------------------------------------------------------- 1 | use fs2::FileExt; 2 | use serde::{de::DeserializeOwned, Deserialize, Serialize}; 3 | use std::{ 4 | fs::{self, File, OpenOptions}, 5 | io::{self, Read, Seek, Write}, 6 | mem, 7 | path::Path, 8 | }; 9 | 10 | use super::{ 11 | spec::{PageNumber, PAGE_SIZE}, 12 | Page, 13 | }; 14 | 15 | #[derive(Serialize, Deserialize, Debug, Clone)] 16 | pub struct BlockMetadata { 17 | pub last_page_overflow: Option, 18 | } 19 | 20 | pub const BLOCK_METADATA_SIZE: usize = mem::size_of::(); 21 | 22 | pub struct BlockIO { 23 | file: File, 24 | } 25 | 26 | impl BlockIO { 27 | pub fn new

(path: P) -> io::Result 28 | where 29 | P: AsRef, 30 | { 31 | if let Some(parent) = path.as_ref().parent() { 32 | fs::create_dir_all(parent)?; 33 | } 34 | 35 | let file = open_file(path.as_ref())?; 36 | 37 | Self::from_file(file) 38 | } 39 | 40 | fn from_file(file: File) -> io::Result { 41 | let mut block = Self { file }; 42 | 43 | if block.file.metadata()?.len() == 0 { 44 | let metadata_page = Page::<()>::create(BLOCK_METADATA_SIZE as u16).unwrap(); 45 | 46 | block.write_page(0, &metadata_page)?; 47 | } 48 | 49 | Ok(block) 50 | } 51 | 52 | pub fn copy_to

(&mut self, path: P) -> io::Result 53 | where 54 | P: AsRef, 55 | { 56 | if let Some(parent) = path.as_ref().parent() { 57 | fs::create_dir_all(parent)?; 58 | } 59 | 60 | let mut new_file = open_file(path.as_ref())?; 61 | 62 | let mut buffer = [0u8; 8192]; 63 | 64 | self.file.seek(io::SeekFrom::Start(0))?; 65 | 66 | while let Ok(n) = self.file.read(&mut buffer) { 67 | if n == 0 { 68 | break; 69 | } 70 | 71 | new_file.write_all(&buffer[..n])?; 72 | } 73 | 74 | Self::from_file(new_file) 75 | } 76 | 77 | pub fn write_new_page(&mut self, page: &Page) -> io::Result 78 | where 79 | T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 80 | { 81 | let new_index = self.len()?; 82 | 83 | self.write_page(new_index.into(), page)?; 84 | 85 | Ok(new_index) 86 | } 87 | 88 | pub fn write_page(&mut self, page_index: u64, page: &Page) -> io::Result<()> 89 | where 90 | T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 91 | { 92 | self.file 93 | .seek(io::SeekFrom::Start(page_index * PAGE_SIZE as u64))?; 94 | 95 | self.file.write_all(&page.to_bytes()?) 96 | } 97 | 98 | pub fn read_page(&mut self, page_index: u64) -> io::Result> 99 | where 100 | T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 101 | { 102 | let mut buffer = [0; PAGE_SIZE as usize]; 103 | 104 | self.file 105 | .seek(io::SeekFrom::Start(page_index * PAGE_SIZE as u64))?; 106 | 107 | self.file.read_exact(&mut buffer)?; 108 | 109 | Ok(Page::open(buffer).unwrap()) 110 | } 111 | 112 | pub fn read_metadata_page(&mut self) -> io::Result> 113 | where 114 | T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 115 | { 116 | let mut buffer = [0; PAGE_SIZE as usize]; 117 | 118 | self.file.seek(io::SeekFrom::Start(0))?; 119 | 120 | self.file.read_exact(&mut buffer)?; 121 | 122 | Ok(Page::open(buffer).unwrap()) 123 | } 124 | 125 | pub fn write_metadata_page(&mut self, page: &Page) -> io::Result<()> 126 | where 127 | T: Serialize + DeserializeOwned + PartialOrd + Ord + Clone, 128 | { 129 | self.file.seek(io::SeekFrom::Start(0))?; 130 | 131 | self.file.write_all(&page.to_bytes()?) 132 | } 133 | 134 | pub fn len(&self) -> io::Result { 135 | let file_size = self.file_size()? as u32; 136 | 137 | Ok(file_size / PAGE_SIZE as u32) 138 | } 139 | 140 | pub fn is_empty(&self) -> io::Result { 141 | Ok(self.len()? == 0) 142 | } 143 | 144 | pub fn exists(&self) -> io::Result { 145 | let metadata = self.file.metadata()?; 146 | 147 | Ok(metadata.is_file() && metadata.len() != 0) 148 | } 149 | 150 | pub fn page_exists(&self, page_index: PageNumber) -> io::Result { 151 | let file_size = self.file_size()? as u32; 152 | 153 | Ok(file_size / PAGE_SIZE as u32 > page_index) 154 | } 155 | 156 | fn file_size(&self) -> io::Result { 157 | let metadata = self.file.metadata()?; 158 | 159 | Ok(metadata.len()) 160 | } 161 | 162 | pub fn sync(&self) -> io::Result<()> { 163 | self.file.sync_data() 164 | } 165 | } 166 | 167 | pub fn open_file(path: &Path) -> io::Result { 168 | let file = OpenOptions::new() 169 | .create(true) 170 | .read(true) 171 | .write(true) 172 | .truncate(false) 173 | .open(path)?; 174 | 175 | file.lock_exclusive()?; 176 | 177 | Ok(file) 178 | } 179 | 180 | impl Drop for BlockIO { 181 | fn drop(&mut self) { 182 | self.file.unlock().unwrap(); 183 | } 184 | } 185 | -------------------------------------------------------------------------------- /src/page/layout.rs: -------------------------------------------------------------------------------- 1 | use super::spec::LocationOffset; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Serialize, Deserialize, Debug, Clone)] 5 | pub struct PageHeader { 6 | pub checksum: u32, 7 | pub lower: LocationOffset, 8 | pub upper: LocationOffset, 9 | pub special: LocationOffset, 10 | } 11 | 12 | #[derive(Serialize, Deserialize, Debug, Clone, Default)] 13 | pub struct CellPointerMetadata { 14 | pub flags: u8, 15 | pub has_overflow: bool, 16 | } 17 | 18 | impl CellPointerMetadata { 19 | pub fn to_vec(&self) -> Vec { 20 | bincode::serialize(self).unwrap() 21 | } 22 | 23 | pub fn from_slice(bytes: &[u8]) -> Self { 24 | bincode::deserialize(bytes).unwrap() 25 | } 26 | } 27 | 28 | #[derive(Default, PartialEq, Eq, Debug, Clone, Serialize, Deserialize)] 29 | pub enum CellPointerFlags { 30 | #[default] 31 | None = 0b0000_0000, 32 | Deleted = 0b0000_0001, 33 | Overflow = 0b0000_0010, 34 | } 35 | 36 | impl From for CellPointerFlags { 37 | fn from(byte: u8) -> Self { 38 | match byte { 39 | 0b0000_0000 => CellPointerFlags::None, 40 | 0b0000_0001 => CellPointerFlags::Deleted, 41 | 0b0000_0010 => CellPointerFlags::Overflow, 42 | _ => panic!("Invalid cell pointer metadata: {:#02x}", byte), 43 | } 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/page/spec.rs: -------------------------------------------------------------------------------- 1 | use super::layout::{CellPointerMetadata, PageHeader}; 2 | 3 | pub const PAGE_FREE_SPACE_BYTE: u8 = 0x00; 4 | pub const CELL_POINTER_SIZE: u16 = 4 + std::mem::size_of::() as u16; 5 | pub const PAGE_SIZE: u16 = 0x1000; 6 | pub const PAGE_HEADER_SIZE: usize = std::mem::size_of::(); 7 | 8 | pub type PageNumber = u32; 9 | pub type LocationOffset = u16; 10 | -------------------------------------------------------------------------------- /src/serializer.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | config::compression_config, 3 | error::{Error, Result}, 4 | }; 5 | use flate2::{read::GzDecoder, write::GzEncoder, Compression}; 6 | use serde::{de::DeserializeOwned, Serialize}; 7 | use std::io::{Read, Write}; 8 | 9 | pub fn serialize(data: &T) -> Result> 10 | where 11 | T: Serialize, 12 | { 13 | let mut bytes = bincode::serialize(data).map_err(Error::SerializeError)?; 14 | 15 | let compression_config = compression_config(); 16 | 17 | if compression_config.enabled { 18 | let mut encoder = GzEncoder::new(Vec::new(), Compression::new(compression_config.level)); 19 | encoder.write_all(&bytes).map_err(Error::IoError)?; 20 | 21 | bytes = encoder.finish().map_err(Error::IoError)?; 22 | } 23 | 24 | Ok(bytes) 25 | } 26 | 27 | pub fn deserialize(bytes: &[u8]) -> Result 28 | where 29 | T: DeserializeOwned, 30 | { 31 | let compression_config = compression_config(); 32 | let mut decoder = GzDecoder::new(bytes); 33 | 34 | if compression_config.enabled && decoder.header().is_some() { 35 | let mut buffer = Vec::new(); 36 | decoder.read_to_end(&mut buffer).unwrap(); 37 | 38 | bincode::deserialize(&buffer).map_err(Error::SerializeError) 39 | } else { 40 | bincode::deserialize(bytes).map_err(Error::SerializeError) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /tests/collection_tests.rs: -------------------------------------------------------------------------------- 1 | use std::thread; 2 | 3 | use dustdata::{collection::Transaction, DustData, DustDataConfig}; 4 | 5 | #[test] 6 | pub fn collection_insert_operation() { 7 | DustDataConfig::new().data_path("test_data").build(); 8 | 9 | let dustdata = DustData::new().unwrap(); 10 | 11 | let collection = dustdata.collection::("test_collection").unwrap(); 12 | 13 | let mut threads = Vec::new(); 14 | 15 | for i in 0..10 { 16 | let collection = collection.clone(); 17 | let tx = thread::spawn(move || { 18 | let mut xact = collection.branch_start().unwrap(); 19 | xact.insert(&i.to_string(), i).unwrap(); 20 | 21 | collection.commit(xact).unwrap(); 22 | }); 23 | 24 | threads.push(tx); 25 | } 26 | 27 | for tx in threads { 28 | tx.join().unwrap(); 29 | } 30 | 31 | for i in 0..10 { 32 | let value = collection.get(&i.to_string()).unwrap().unwrap(); 33 | 34 | assert_eq!(value, i); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /tests/run-tests.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -o errexit 4 | set -o nounset 5 | set -o pipefail 6 | 7 | rm -rf test_data 8 | cargo test $@ 9 | --------------------------------------------------------------------------------