├── .github └── workflows │ └── test.yml ├── .gitignore ├── .hgignore ├── .hgtags ├── .travis.yml ├── Cargo.toml ├── LICENSE ├── README.md ├── bitbucket-pipelines.yml ├── coverage.sh ├── examples ├── asyncdb-async-std │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── asyncdb-tokio │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── build_examples.sh ├── kvserver │ ├── .hgignore │ ├── Cargo.toml │ ├── README.md │ └── src │ │ └── main.rs ├── leveldb-tool │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── mcpe │ ├── Cargo.toml │ ├── README.md │ └── src │ │ └── main.rs ├── stresstest │ ├── Cargo.toml │ └── src │ │ └── main.rs ├── word-analyze │ ├── Cargo.toml │ └── src │ │ └── main.rs └── write-a-lot │ ├── Cargo.toml │ └── src │ └── main.rs └── src ├── asyncdb.rs ├── asyncdb_async_std.rs ├── asyncdb_tokio.rs ├── benches └── maps_bench.rs ├── block.rs ├── block_builder.rs ├── blockhandle.rs ├── cache.rs ├── cmp.rs ├── compressor.rs ├── crc.rs ├── db_impl.rs ├── db_iter.rs ├── disk_env.rs ├── env.rs ├── env_common.rs ├── error.rs ├── filter.rs ├── filter_block.rs ├── infolog.rs ├── key_types.rs ├── lib.rs ├── log.rs ├── mem_env.rs ├── memtable.rs ├── merging_iter.rs ├── options.rs ├── skipmap.rs ├── snapshot.rs ├── table_block.rs ├── table_builder.rs ├── table_cache.rs ├── table_reader.rs ├── test_util.rs ├── types.rs ├── version.rs ├── version_edit.rs ├── version_set.rs └── write_batch.rs /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: Actions CI 4 | 5 | jobs: 6 | build_and_test: 7 | name: leveldb-rs 8 | strategy: 9 | fail-fast: false 10 | matrix: 11 | features: ["async", ""] 12 | platform: [ubuntu-latest, windows-latest, macos-latest] 13 | runs-on: ${{ matrix.platform }} 14 | steps: 15 | - uses: actions/checkout@v2 16 | - uses: actions-rs/toolchain@v1 17 | with: 18 | toolchain: stable 19 | profile: minimal 20 | default: true 21 | - uses: actions-rs/cargo@v1 22 | with: 23 | command: test 24 | args: --no-default-features --features=${{ matrix.features }} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | **/*.rs.bk 3 | **/Cargo.lock -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | syntax: glob 2 | 3 | *.bk 4 | *.orig 5 | *.swp 6 | Cargo.lock 7 | kcov-out 8 | target 9 | 10 | # LevelDB test instances 11 | LOG{,.old} 12 | LOCK 13 | CURRENT 14 | *.ldb 15 | *.log 16 | MANIFEST-* 17 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 37489b2ef580c735608f1fcd8c408e486fee0565 v0.2.7 2 | 8f1a6f7da79d13dc67f3c79283315174a3dd87c6 v0.3.0 3 | 81baad762dca4aeeee1d9cf42af1ef9918bc4373 v0.3.1 4 | 4c54943d52e36171824a94767bd21cff24bd7bfc v0.3.2 5 | faabff9f632099e5cda1a4367a35dc49d74d99a9 v0.3.3 6 | 7b2359303bcca9efab3dbabb25fd734e2ff40e1f v0.3.6 7 | 1a7d1eba51bc318400d2f0542ecf54e33ebca6c9 v1.0 8 | 744663476044a5009134d53ceaa95a2fda620347 v1.0.1 9 | 7575b9ee7b7172fc8e8bd617363c37cd541af1b1 v1.0.2 10 | 12007b450b57ea457541aa2b73a25c663048938c v1.0.3 11 | d819c7d21878d9d0637788d92ffa8078e135aebf v1.0.6 12 | 3593ae567020c6720c3a3f2b63d0f0354e9cec24 v1.0.7 13 | 107595106b55a7a0592b8580df41d683ff92271c v1.0.8 14 | 975674a897c1937f50a9351a0eb5fbb490df4f20 v2.0.0 15 | 685df78c577805afb420a466114fb55315749c10 v3.0.0 16 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: 2 | - linux 3 | - osx 4 | # Windows is troublesome and useless 5 | # - windows 6 | dist: bionic 7 | sudo: false 8 | 9 | language: rust 10 | rust: 11 | - stable 12 | - nightly 13 | 14 | stages: 15 | - name: test 16 | - name: lint 17 | 18 | install: true 19 | 20 | # Default script is the "test" stage 21 | script: 22 | - cargo build 23 | - cargo test 24 | - examples/build_examples.sh 25 | 26 | jobs: 27 | include: 28 | - stage: lint 29 | if: os = linux 30 | rust: stable 31 | install: 32 | - rustup component add rustfmt 33 | script: 34 | - cargo fmt --all -- --check 35 | 36 | cache: cargo 37 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rusty-leveldb" 3 | version = "3.0.3" 4 | authors = ["Lewin Bormann "] 5 | description = "A compatible re-implementation of LevelDB in Rust" 6 | homepage = "https://github.com/dermesser/leveldb-rs" 7 | repository = "https://github.com/dermesser/leveldb-rs" 8 | readme = "README.md" 9 | keywords = ["LevelDB", "key-value", "database", "SSTable", "Google"] 10 | license = "MIT" 11 | publish = true 12 | edition = "2021" 13 | include = ["src/**/*", "src/*", "Cargo.toml", "LICENSE", "README.md"] 14 | 15 | [lib] 16 | crate-type = ["cdylib", "rlib"] 17 | 18 | [dependencies] 19 | crc32c = "0.6.8" 20 | integer-encoding = "3.0" 21 | rand = "0.8.5" 22 | snap = "1.0" 23 | 24 | errno = { optional = true, version = "0.2" } 25 | fs2 = { optional = true, version = "0.4.3" } 26 | 27 | tokio = { optional = true, features = ["rt", "sync"], version = "1.39.3" } 28 | async-std = { optional = true, version = "1.12.0" } 29 | 30 | [features] 31 | default = ["fs"] 32 | async = ["asyncdb-tokio"] 33 | asyncdb-tokio = ["tokio"] 34 | asyncdb-async-std = ["async-std"] 35 | fs = ["errno", "fs2"] 36 | 37 | [dev-dependencies] 38 | time-test = "0.3" 39 | bencher = "0.1" 40 | 41 | [[bench]] 42 | name = "maps_bench" 43 | harness = false 44 | path = "src/benches/maps_bench.rs" 45 | 46 | [workspace] 47 | members = [ 48 | "examples/write-a-lot", 49 | "examples/leveldb-tool", 50 | "examples/word-analyze", 51 | "examples/stresstest", 52 | "examples/asyncdb-tokio", 53 | "examples/asyncdb-async-std", 54 | "examples/mcpe", 55 | "examples/kvserver", 56 | ] 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Lewin Bormann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to 7 | deal in the Software without restriction, including without limitation the 8 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 9 | sell copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 | IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # leveldb-rs 2 | 3 | [![crates.io](https://img.shields.io/crates/v/rusty-leveldb.svg)](https://crates.io/crates/rusty-leveldb) 4 | 5 | A fully compatible implementation of LevelDB in Rust. (any incompatibility is a 6 | bug!) 7 | 8 | The implementation is very close to the original; often, you can see the same 9 | algorithm translated 1:1, and class (struct) and method names are similar or 10 | the same. 11 | 12 | **NOTE: I do not endorse using this library for any data that you care about.** 13 | I do care, however, about bug reports. 14 | 15 | ## Status 16 | 17 | Working fairly well. Please file an issue if you encounter problems. 18 | 19 | ## Goals 20 | 21 | Some of the goals of this implementation are 22 | 23 | * As few copies of data as possible; most of the time, slices of bytes (`&[u8]`) 24 | are used. Owned memory is represented as `Vec` (and then possibly borrowed 25 | as slice). Zero-copy is not always possible, though, and sometimes simplicity is favored. 26 | * Correctness -- self-checking implementation, good test coverage, etc. Just 27 | like the original implementation. 28 | * Clarity; commented code, clear structure (hopefully doing a better job than 29 | the original implementation). 30 | * Coming close-ish to the original implementation; clarifying the translation of 31 | typical C++ constructs to Rust, and doing a better job at helping understand the internals. 32 | -------------------------------------------------------------------------------- /bitbucket-pipelines.yml: -------------------------------------------------------------------------------- 1 | image: rustlang/rust:nightly 2 | 3 | pipelines: 4 | branches: 5 | '*': 6 | - step: 7 | caches: 8 | - cargo 9 | - rust-target 10 | script: 11 | - echo "Build project" ; cargo build --release 12 | - echo "Run unit tests" ; cargo test --lib --release -v --no-fail-fast -- --nocapture --test 13 | - echo "Run documentation tests" ; cargo test --doc --release -v --no-fail-fast -- --nocapture --test 14 | definitions: 15 | caches: 16 | cargo: /usr/local/cargo # CARGO_HOME 17 | rust-target: $BITBUCKET_CLONE_DIR/target 18 | 19 | -------------------------------------------------------------------------------- /coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | KCOV=kcov 5 | KCOV_OPTS="--exclude-pattern=/.cargo,/glibc,/usr/lib,/usr/include" 6 | KCOV_OUT="./kcov-out/" 7 | 8 | export RUSTFLAGS="-C link-dead-code" 9 | 10 | TEST_BIN=$(cargo test 2>&1 >/dev/null | awk '/^ Running target\/debug\/deps\// { print $2 }') 11 | 12 | echo $TEST_BIN 13 | ${KCOV} ${KCOV_OPTS} ${KCOV_OUT} ${TEST_BIN} && xdg-open ${KCOV_OUT}/index.html 14 | -------------------------------------------------------------------------------- /examples/asyncdb-async-std/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "asyncdb-async-std" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | async-std = { version = "1.12.0", features = ["attributes"] } 10 | rusty-leveldb = { path = "../../", features = ["asyncdb-async-std"] } 11 | -------------------------------------------------------------------------------- /examples/asyncdb-async-std/src/main.rs: -------------------------------------------------------------------------------- 1 | use rusty_leveldb::{AsyncDB, Options, Status, StatusCode}; 2 | 3 | #[async_std::main] 4 | async fn main() { 5 | let adb = AsyncDB::new("testdb", Options::default()).unwrap(); 6 | 7 | adb.put("Hello".as_bytes().to_owned(), "World".as_bytes().to_owned()) 8 | .await 9 | .expect("put()"); 10 | 11 | let r = adb.get("Hello".as_bytes().to_owned()).await; 12 | assert_eq!(r, Ok(Some("World".as_bytes().to_owned()))); 13 | 14 | let snapshot = adb.get_snapshot().await.expect("get_snapshot()"); 15 | 16 | adb.delete("Hello".as_bytes().to_owned()) 17 | .await 18 | .expect("delete()"); 19 | 20 | // A snapshot allows us to travel back in time before the deletion. 21 | let r2 = adb.get_at(snapshot, "Hello".as_bytes().to_owned()).await; 22 | assert_eq!(r2, Ok(Some("World".as_bytes().to_owned()))); 23 | 24 | // Once dropped, a snapshot cannot be used anymore. 25 | adb.drop_snapshot(snapshot).await.expect("drop_snapshot()"); 26 | 27 | let r3 = adb.get_at(snapshot, "Hello".as_bytes().to_owned()).await; 28 | assert_eq!( 29 | r3, 30 | Err(Status { 31 | code: StatusCode::AsyncError, 32 | err: "Unknown snapshot reference: this is a bug".to_string() 33 | }) 34 | ); 35 | 36 | adb.flush().await.expect("flush()"); 37 | adb.close().await.expect("close()"); 38 | } 39 | -------------------------------------------------------------------------------- /examples/asyncdb-tokio/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "asyncdb-tokio" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | tokio = { version = "1.21", features = ["rt", "macros"] } 10 | rusty-leveldb = { path = "../../", features = ["asyncdb-tokio"] } 11 | -------------------------------------------------------------------------------- /examples/asyncdb-tokio/src/main.rs: -------------------------------------------------------------------------------- 1 | use tokio::main; 2 | 3 | use rusty_leveldb::{AsyncDB, Options, Status, StatusCode}; 4 | 5 | #[main(flavor = "current_thread")] 6 | async fn main() { 7 | let adb = AsyncDB::new("testdb", Options::default()).unwrap(); 8 | 9 | adb.put("Hello".as_bytes().to_owned(), "World".as_bytes().to_owned()) 10 | .await 11 | .expect("put()"); 12 | 13 | let r = adb.get("Hello".as_bytes().to_owned()).await; 14 | assert_eq!(r, Ok(Some("World".as_bytes().to_owned()))); 15 | 16 | let snapshot = adb.get_snapshot().await.expect("get_snapshot()"); 17 | 18 | adb.delete("Hello".as_bytes().to_owned()) 19 | .await 20 | .expect("delete()"); 21 | 22 | // A snapshot allows us to travel back in time before the deletion. 23 | let r2 = adb.get_at(snapshot, "Hello".as_bytes().to_owned()).await; 24 | assert_eq!(r2, Ok(Some("World".as_bytes().to_owned()))); 25 | 26 | // Once dropped, a snapshot cannot be used anymore. 27 | adb.drop_snapshot(snapshot).await.expect("drop_snapshot()"); 28 | 29 | let r3 = adb.get_at(snapshot, "Hello".as_bytes().to_owned()).await; 30 | assert_eq!( 31 | r3, 32 | Err(Status { 33 | code: StatusCode::AsyncError, 34 | err: "Unknown snapshot reference: this is a bug".to_string() 35 | }) 36 | ); 37 | 38 | adb.flush().await.expect("flush()"); 39 | adb.close().await.expect("close()"); 40 | } 41 | -------------------------------------------------------------------------------- /examples/build_examples.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | for D in `find examples/ -maxdepth 1 -mindepth 1 -type d`; 6 | do 7 | pushd ${D} 8 | cargo build 9 | popd 10 | done 11 | -------------------------------------------------------------------------------- /examples/kvserver/.hgignore: -------------------------------------------------------------------------------- 1 | ^target/ 2 | -------------------------------------------------------------------------------- /examples/kvserver/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "kvserver" 3 | version = "0.1.0" 4 | authors = ["Lewin Bormann "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | canteen = "0.5" 11 | rusty-leveldb = { path = "../../" } 12 | -------------------------------------------------------------------------------- /examples/kvserver/README.md: -------------------------------------------------------------------------------- 1 | # kvserver 2 | 3 | A simplistic Key/Value HTTP server using rusty-leveldb. 4 | 5 | It is not multi-threaded, and achieves around 13'000 ops per second (both 6 | fetching and storing keys) on my somewhat old `Intel(R) Xeon(R) CPU E5-1650 v2 @ 7 | 3.50GHz` using `ab`. 8 | 9 | For comparison, writing of random keys directly (in-process) usually happens at 10 | 300'000 keys per second. 11 | -------------------------------------------------------------------------------- /examples/kvserver/src/main.rs: -------------------------------------------------------------------------------- 1 | struct KVService { 2 | db: rusty_leveldb::DB, 3 | } 4 | 5 | static mut STORAGE_SERVICE: Option> = None; 6 | 7 | impl KVService { 8 | fn handle_get(&mut self, req: &canteen::Request) -> canteen::Response { 9 | let key: String = req.get("key"); 10 | 11 | let val = self.db.get(key.as_bytes()); 12 | 13 | let mut rp = canteen::Response::new(); 14 | 15 | rp.set_status(200); 16 | rp.set_content_type("text/plain"); 17 | 18 | if let Some(val) = val { 19 | rp.append(val); 20 | } else { 21 | rp.set_status(404); 22 | } 23 | rp 24 | } 25 | fn handle_put(&mut self, req: &canteen::Request) -> canteen::Response { 26 | let mut rp = canteen::Response::new(); 27 | let key: String = req.get("key"); 28 | let val = &req.payload; 29 | 30 | self.db.put(key.as_bytes(), val.as_ref()).unwrap(); 31 | 32 | rp.set_status(200); 33 | rp.set_content_type("text/plain"); 34 | rp 35 | } 36 | } 37 | 38 | fn get_key_fn(rq: &canteen::Request) -> canteen::Response { 39 | unsafe { 40 | STORAGE_SERVICE 41 | .as_ref() 42 | .unwrap() 43 | .lock() 44 | .unwrap() 45 | .handle_get(rq) 46 | } 47 | } 48 | 49 | fn put_key_fn(rq: &canteen::Request) -> canteen::Response { 50 | unsafe { 51 | STORAGE_SERVICE 52 | .as_ref() 53 | .unwrap() 54 | .lock() 55 | .unwrap() 56 | .handle_put(rq) 57 | } 58 | } 59 | 60 | fn main() { 61 | let db = rusty_leveldb::DB::open("httpdb", rusty_leveldb::Options::default()).unwrap(); 62 | let service = KVService { db }; 63 | unsafe { STORAGE_SERVICE = Some(std::sync::Mutex::new(service)) }; 64 | 65 | let mut ct = canteen::Canteen::new(); 66 | ct.add_route("/kvs/get/", &[canteen::Method::Get], get_key_fn); 67 | ct.add_route( 68 | "/kvs/put/", 69 | &[canteen::Method::Put, canteen::Method::Post], 70 | put_key_fn, 71 | ); 72 | ct.bind("0.0.0.0:8080"); 73 | ct.run() 74 | } 75 | -------------------------------------------------------------------------------- /examples/leveldb-tool/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "leveldb-tool" 3 | version = "0.1.0" 4 | authors = ["Lewin Bormann "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | rusty-leveldb = { path = "../../" } 9 | -------------------------------------------------------------------------------- /examples/leveldb-tool/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rusty_leveldb; 2 | 3 | use rusty_leveldb::{compressor, CompressorId, LdbIterator, Options, DB}; 4 | 5 | use std::env::args; 6 | use std::io::{self, Write}; 7 | use std::iter::FromIterator; 8 | 9 | fn get(db: &mut DB, k: &str) { 10 | match db.get(k.as_bytes()) { 11 | Some(v) => { 12 | if let Ok(s) = String::from_utf8(v.clone()) { 13 | eprintln!("{} => {}", k, s); 14 | } else { 15 | eprintln!("{} => {:?}", k, v); 16 | } 17 | } 18 | None => eprintln!("{} => ", k), 19 | } 20 | } 21 | 22 | fn put(db: &mut DB, k: &str, v: &str) { 23 | db.put(k.as_bytes(), v.as_bytes()).unwrap(); 24 | db.flush().unwrap(); 25 | } 26 | 27 | fn delete(db: &mut DB, k: &str) { 28 | db.delete(k.as_bytes()).unwrap(); 29 | db.flush().unwrap(); 30 | } 31 | 32 | fn iter(db: &mut DB) { 33 | let mut it = db.new_iter().unwrap(); 34 | let (mut k, mut v) = (vec![], vec![]); 35 | let mut out = io::BufWriter::new(io::stdout()); 36 | while it.advance() { 37 | it.current(&mut k, &mut v); 38 | out.write_all(&k).unwrap(); 39 | out.write_all(b" => ").unwrap(); 40 | out.write_all(&v).unwrap(); 41 | out.write_all(b"\n").unwrap(); 42 | } 43 | } 44 | 45 | fn compact(db: &mut DB, from: &str, to: &str) { 46 | db.compact_range(from.as_bytes(), to.as_bytes()).unwrap(); 47 | } 48 | 49 | fn main() { 50 | let args = Vec::from_iter(args()); 51 | 52 | if args.len() < 2 { 53 | panic!( 54 | "Usage: {} [get|put/set|delete|iter|compact] [key|from] [val|to]", 55 | args[0] 56 | ); 57 | } 58 | 59 | let opt = Options { 60 | reuse_logs: false, 61 | reuse_manifest: false, 62 | compressor: compressor::SnappyCompressor::ID, 63 | ..Default::default() 64 | }; 65 | let mut db = DB::open("tooldb", opt).unwrap(); 66 | 67 | match args[1].as_str() { 68 | "get" => { 69 | if args.len() < 3 { 70 | panic!("Usage: {} get key", args[0]); 71 | } 72 | get(&mut db, &args[2]); 73 | } 74 | "put" | "set" => { 75 | if args.len() < 4 { 76 | panic!("Usage: {} put key val", args[0]); 77 | } 78 | put(&mut db, &args[2], &args[3]); 79 | } 80 | "delete" => { 81 | if args.len() < 3 { 82 | panic!("Usage: {} delete key", args[0]); 83 | } 84 | delete(&mut db, &args[2]); 85 | } 86 | "iter" => iter(&mut db), 87 | "compact" => { 88 | if args.len() < 4 { 89 | panic!("Usage: {} compact from to", args[0]); 90 | } 91 | compact(&mut db, &args[2], &args[3]); 92 | } 93 | _ => unimplemented!(), 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /examples/mcpe/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "mcpe" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | miniz_oxide = "0.7.1" 8 | rusty-leveldb = { path = "../../" } 9 | -------------------------------------------------------------------------------- /examples/mcpe/README.md: -------------------------------------------------------------------------------- 1 | # MCPE 2 | 3 | This example show how to customize compression method. 4 | 5 | This setup is compatible to [Mojang's leveldb](https://github.com/Mojang/leveldb-mcpe). -------------------------------------------------------------------------------- /examples/mcpe/src/main.rs: -------------------------------------------------------------------------------- 1 | use miniz_oxide::deflate::{compress_to_vec, compress_to_vec_zlib, CompressionLevel}; 2 | use miniz_oxide::inflate::{decompress_to_vec, decompress_to_vec_zlib}; 3 | use rusty_leveldb::compressor::NoneCompressor; 4 | use rusty_leveldb::{Compressor, CompressorList, Options, DB}; 5 | use std::rc::Rc; 6 | 7 | /// A zlib compressor that with zlib wrapper 8 | /// 9 | /// This is use for old world format 10 | struct ZlibCompressor(u8); 11 | 12 | impl ZlibCompressor { 13 | /// compression level 0-10 14 | pub fn new(level: u8) -> Self { 15 | assert!(level <= 10); 16 | Self(level) 17 | } 18 | } 19 | 20 | impl Compressor for ZlibCompressor { 21 | fn encode(&self, block: Vec) -> rusty_leveldb::Result> { 22 | Ok(compress_to_vec_zlib(&block, self.0)) 23 | } 24 | 25 | fn decode(&self, block: Vec) -> rusty_leveldb::Result> { 26 | decompress_to_vec_zlib(&block).map_err(|e| rusty_leveldb::Status { 27 | code: rusty_leveldb::StatusCode::CompressionError, 28 | err: e.to_string(), 29 | }) 30 | } 31 | } 32 | 33 | /// A zlib compressor that without zlib wrapper 34 | /// 35 | /// > windowBits can also be –8..–15 for raw deflate. In this case, -windowBits determines the window size. deflate() will then generate raw deflate data with no zlib header or trailer, and will not compute a check value. 36 | /// > 37 | /// > From [zlib manual](https://zlib.net/manual.html) 38 | /// 39 | /// It seems like Mojang use this for newer version 40 | /// 41 | /// A copy of Mojang's implementation can be find [here](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/db/zlib_compressor.cc#L119). 42 | struct RawZlibCompressor(u8); 43 | 44 | impl RawZlibCompressor { 45 | /// compression level 0-10 46 | pub fn new(level: u8) -> Self { 47 | assert!(level <= 10); 48 | Self(level) 49 | } 50 | } 51 | 52 | impl Compressor for RawZlibCompressor { 53 | fn encode(&self, block: Vec) -> rusty_leveldb::Result> { 54 | Ok(compress_to_vec(&block, self.0)) 55 | } 56 | 57 | fn decode(&self, block: Vec) -> rusty_leveldb::Result> { 58 | decompress_to_vec(&block).map_err(|e| rusty_leveldb::Status { 59 | code: rusty_leveldb::StatusCode::CompressionError, 60 | err: e.to_string(), 61 | }) 62 | } 63 | } 64 | 65 | pub fn mcpe_options(compression_level: u8) -> Options { 66 | let mut opt = Options::default(); 67 | 68 | // Mojang create a custom [compressor list](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/include/leveldb/options.h#L123) 69 | // Sample config for compressor list can be find in [here](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/mcpe_sample_setup.cpp#L24-L28) 70 | // 71 | // Their compression id can be find in [here](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/include/leveldb/zlib_compressor.h#L38) 72 | // and [here](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/include/leveldb/zlib_compressor.h#L48) 73 | // 74 | // Compression id will be use in [here](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/table/format.cc#L125-L150) 75 | let mut list = CompressorList::new(); 76 | list.set_with_id(0, NoneCompressor {}); 77 | list.set_with_id(2, ZlibCompressor::new(compression_level)); 78 | list.set_with_id(4, RawZlibCompressor::new(compression_level)); 79 | opt.compressor_list = Rc::new(list); 80 | 81 | // Set compressor 82 | // Minecraft bedrock may use other id than 4 however default is 4. [Mojang's implementation](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/table/table_builder.cc#L152) 83 | // 84 | // There is a bug in this library that you have to open a database with the same compression type as it was written to. 85 | // If raw data is smaller than compression, Mojang will use raw data. [Mojang's implementation](https://github.com/reedacartwright/rbedrock/blob/fb32a899da4e15c1aaa0d6de2b459e914e183516/src/leveldb-mcpe/table/table_builder.cc#L155-L165) 86 | // There is a small chance that compression id 0 exists, you should use compression id 0 to write it. 87 | opt.compressor = 4; 88 | 89 | opt 90 | } 91 | 92 | /// Path to world's db folder 93 | const PATH: &str = "mcpe_db"; 94 | 95 | /// Mojang use `DefaultLevel` for world compression 96 | const COMPRESSION_LEVEL: u8 = CompressionLevel::DefaultLevel as u8; 97 | 98 | fn main() { 99 | let opt = mcpe_options(COMPRESSION_LEVEL); 100 | let mut db = DB::open(PATH, opt).unwrap(); 101 | db.put(b"~local_player", b"NBT data goes here").unwrap(); 102 | let value = db.get(b"~local_player").unwrap(); 103 | assert_eq!(&value, b"NBT data goes here") 104 | } 105 | -------------------------------------------------------------------------------- /examples/stresstest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "stresstest" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | rand = "0.8.5" 10 | 11 | rusty-leveldb = { path = "../../" } 12 | time-test = "0.2.3" 13 | -------------------------------------------------------------------------------- /examples/stresstest/src/main.rs: -------------------------------------------------------------------------------- 1 | use rand::distributions::{Alphanumeric, DistString}; 2 | use rusty_leveldb::{compressor, CompressorId, Options, DB}; 3 | 4 | const KEY_LEN: usize = 5; 5 | const VAL_LEN: usize = 8; 6 | 7 | fn gen_string(n: usize) -> String { 8 | Alphanumeric 9 | .sample_string(&mut rand::thread_rng(), n) 10 | .to_lowercase() 11 | } 12 | 13 | fn write(db: &mut DB, n: usize) { 14 | time_test::time_test!("write"); 15 | for i in 0..n { 16 | let (k, v) = (gen_string(KEY_LEN), gen_string(VAL_LEN)); 17 | 18 | db.put(k.as_bytes(), v.as_bytes()).unwrap(); 19 | if i % (n / 100) == 0 { 20 | println!("{}/100 ...", i * 100 / n); 21 | db.flush().unwrap(); 22 | } 23 | } 24 | 25 | { 26 | time_test::time_test!("write-flush"); 27 | db.flush().unwrap(); 28 | } 29 | } 30 | 31 | fn read(db: &mut DB, n: usize) -> usize { 32 | let mut succ = 0; 33 | time_test::time_test!("read"); 34 | for _ in 0..n { 35 | let k = gen_string(KEY_LEN); 36 | 37 | if db.get(k.as_bytes()).is_some() { 38 | succ += 1; 39 | } 40 | } 41 | succ 42 | } 43 | 44 | fn main() { 45 | let n = 1_000_000; 46 | let m = 10; 47 | let path = "stresstestdb"; 48 | let mut entries = 0; 49 | 50 | for i in 0..m { 51 | let opt = Options { 52 | compressor: compressor::SnappyCompressor::ID, 53 | ..Default::default() 54 | }; 55 | let mut db = DB::open(path, opt).unwrap(); 56 | write(&mut db, n); 57 | entries += n; 58 | println!("Wrote {} entries ({}/{})", entries, i + 1, m); 59 | 60 | let s = read(&mut db, n); 61 | println!("Read back {} entries (found {}) ({}/{})", n, s, i + 1, m); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /examples/word-analyze/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "word-analyze" 3 | version = "0.1.0" 4 | authors = ["Lewin Bormann "] 5 | edition = "2018" 6 | 7 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 8 | 9 | [dependencies] 10 | rusty-leveldb = { path = "../../" } 11 | integer-encoding = "1" 12 | -------------------------------------------------------------------------------- /examples/word-analyze/src/main.rs: -------------------------------------------------------------------------------- 1 | use leveldb::CompressorId; 2 | use rusty_leveldb as leveldb; 3 | 4 | use std::fs::OpenOptions; 5 | use std::io::{self, BufRead}; 6 | use std::path::Path; 7 | 8 | fn update_count(w: &str, db: &mut leveldb::DB) -> Option<()> { 9 | let mut count: usize = 0; 10 | if let Some(v) = db.get(w.as_bytes()) { 11 | let s = String::from_utf8(v).unwrap(); 12 | count = s.parse::().unwrap(); 13 | } 14 | count += 1; 15 | let s = count.to_string(); 16 | db.put(w.as_bytes(), s.as_bytes()).unwrap(); 17 | Some(()) 18 | } 19 | 20 | fn run(mut db: leveldb::DB) -> io::Result<()> { 21 | let files = std::env::args().skip(1); 22 | 23 | for f in files { 24 | let f = OpenOptions::new().read(true).open(Path::new(&f))?; 25 | for line in io::BufReader::new(f).lines() { 26 | for word in line.unwrap().split_whitespace() { 27 | let mut word = word.to_ascii_lowercase(); 28 | word.retain(|c| c.is_ascii_alphanumeric()); 29 | update_count(&word, &mut db); 30 | } 31 | } 32 | } 33 | 34 | Ok(()) 35 | } 36 | 37 | fn main() { 38 | let opts = leveldb::Options { 39 | compressor: leveldb::compressor::NoneCompressor::ID, 40 | ..Default::default() 41 | }; 42 | let db = leveldb::DB::open("wordsdb", opts).unwrap(); 43 | 44 | run(db).unwrap(); 45 | } 46 | -------------------------------------------------------------------------------- /examples/write-a-lot/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "write-a-lot" 3 | version = "0.1.0" 4 | authors = ["Lewin Bormann "] 5 | edition = "2021" 6 | 7 | [dependencies] 8 | rusty-leveldb = { path = "../../" } 9 | rand = "0.3" 10 | -------------------------------------------------------------------------------- /examples/write-a-lot/src/main.rs: -------------------------------------------------------------------------------- 1 | extern crate rand; 2 | extern crate rusty_leveldb; 3 | 4 | use rand::Rng; 5 | use rusty_leveldb::{compressor, CompressorId, Options, DB}; 6 | 7 | use std::error::Error; 8 | use std::iter::FromIterator; 9 | 10 | const KEY_LEN: usize = 16; 11 | const VAL_LEN: usize = 48; 12 | 13 | fn gen_string(len: usize) -> String { 14 | let mut rng = rand::thread_rng(); 15 | String::from_iter(rng.gen_ascii_chars().take(len)) 16 | } 17 | 18 | fn fill_db(db: &mut DB, entries: usize) -> Result<(), Box> { 19 | for i in 0..entries { 20 | let (k, v) = (gen_string(KEY_LEN), gen_string(VAL_LEN)); 21 | db.put(k.as_bytes(), v.as_bytes())?; 22 | 23 | if i % 100 == 0 { 24 | db.flush()?; 25 | } 26 | } 27 | Ok(()) 28 | } 29 | 30 | fn main() { 31 | let opt = Options { 32 | compressor: compressor::SnappyCompressor::ID, 33 | ..Default::default() 34 | }; 35 | let mut db = DB::open("test1", opt).unwrap(); 36 | 37 | fill_db(&mut db, 32768).unwrap(); 38 | 39 | db.close().unwrap(); 40 | } 41 | -------------------------------------------------------------------------------- /src/asyncdb.rs: -------------------------------------------------------------------------------- 1 | use std::collections::hash_map::HashMap; 2 | 3 | use crate::{ 4 | send_response, send_response_result, AsyncDB, Message, Result, Status, StatusCode, WriteBatch, 5 | DB, 6 | }; 7 | 8 | pub(crate) const CHANNEL_BUFFER_SIZE: usize = 32; 9 | 10 | #[derive(Clone, Copy)] 11 | pub struct SnapshotRef(usize); 12 | 13 | /// A request sent to the database thread. 14 | pub(crate) enum Request { 15 | Close, 16 | Put { key: Vec, val: Vec }, 17 | Delete { key: Vec }, 18 | Write { batch: WriteBatch, sync: bool }, 19 | Flush, 20 | GetAt { snapshot: SnapshotRef, key: Vec }, 21 | Get { key: Vec }, 22 | GetSnapshot, 23 | DropSnapshot { snapshot: SnapshotRef }, 24 | CompactRange { from: Vec, to: Vec }, 25 | } 26 | 27 | /// A response received from the database thread. 28 | pub(crate) enum Response { 29 | OK, 30 | Error(Status), 31 | Value(Option>), 32 | Snapshot(SnapshotRef), 33 | } 34 | 35 | impl AsyncDB { 36 | pub async fn close(&self) -> Result<()> { 37 | let r = self.process_request(Request::Close).await?; 38 | match r { 39 | Response::OK => Ok(()), 40 | Response::Error(s) => Err(s), 41 | _ => Err(Status { 42 | code: StatusCode::AsyncError, 43 | err: "Wrong response type in AsyncDB.".to_string(), 44 | }), 45 | } 46 | } 47 | 48 | pub async fn put(&self, key: Vec, val: Vec) -> Result<()> { 49 | let r = self.process_request(Request::Put { key, val }).await?; 50 | match r { 51 | Response::OK => Ok(()), 52 | Response::Error(s) => Err(s), 53 | _ => Err(Status { 54 | code: StatusCode::AsyncError, 55 | err: "Wrong response type in AsyncDB.".to_string(), 56 | }), 57 | } 58 | } 59 | pub async fn delete(&self, key: Vec) -> Result<()> { 60 | let r = self.process_request(Request::Delete { key }).await?; 61 | match r { 62 | Response::OK => Ok(()), 63 | Response::Error(s) => Err(s), 64 | _ => Err(Status { 65 | code: StatusCode::AsyncError, 66 | err: "Wrong response type in AsyncDB.".to_string(), 67 | }), 68 | } 69 | } 70 | pub async fn write(&self, batch: WriteBatch, sync: bool) -> Result<()> { 71 | let r = self.process_request(Request::Write { batch, sync }).await?; 72 | match r { 73 | Response::OK => Ok(()), 74 | Response::Error(s) => Err(s), 75 | _ => Err(Status { 76 | code: StatusCode::AsyncError, 77 | err: "Wrong response type in AsyncDB.".to_string(), 78 | }), 79 | } 80 | } 81 | pub async fn flush(&self) -> Result<()> { 82 | let r = self.process_request(Request::Flush).await?; 83 | match r { 84 | Response::OK => Ok(()), 85 | Response::Error(s) => Err(s), 86 | _ => Err(Status { 87 | code: StatusCode::AsyncError, 88 | err: "Wrong response type in AsyncDB.".to_string(), 89 | }), 90 | } 91 | } 92 | pub async fn get(&self, key: Vec) -> Result>> { 93 | let r = self.process_request(Request::Get { key }).await?; 94 | match r { 95 | Response::Value(v) => Ok(v), 96 | Response::Error(s) => Err(s), 97 | _ => Err(Status { 98 | code: StatusCode::AsyncError, 99 | err: "Wrong response type in AsyncDB.".to_string(), 100 | }), 101 | } 102 | } 103 | pub async fn get_at(&self, snapshot: SnapshotRef, key: Vec) -> Result>> { 104 | let r = self 105 | .process_request(Request::GetAt { snapshot, key }) 106 | .await?; 107 | match r { 108 | Response::Value(v) => Ok(v), 109 | Response::Error(s) => Err(s), 110 | _ => Err(Status { 111 | code: StatusCode::AsyncError, 112 | err: "Wrong response type in AsyncDB.".to_string(), 113 | }), 114 | } 115 | } 116 | pub async fn get_snapshot(&self) -> Result { 117 | let r = self.process_request(Request::GetSnapshot).await?; 118 | match r { 119 | Response::Snapshot(sr) => Ok(sr), 120 | _ => Err(Status { 121 | code: StatusCode::AsyncError, 122 | err: "Wrong response type in AsyncDB.".to_string(), 123 | }), 124 | } 125 | } 126 | /// As snapshots returned by `AsyncDB::get_snapshot()` are sort-of "weak references" to an 127 | /// actual snapshot, they need to be dropped explicitly. 128 | pub async fn drop_snapshot(&self, snapshot: SnapshotRef) -> Result<()> { 129 | let r = self 130 | .process_request(Request::DropSnapshot { snapshot }) 131 | .await?; 132 | match r { 133 | Response::OK => Ok(()), 134 | _ => Err(Status { 135 | code: StatusCode::AsyncError, 136 | err: "Wrong response type in AsyncDB.".to_string(), 137 | }), 138 | } 139 | } 140 | pub async fn compact_range(&self, from: Vec, to: Vec) -> Result<()> { 141 | let r = self 142 | .process_request(Request::CompactRange { from, to }) 143 | .await?; 144 | match r { 145 | Response::OK => Ok(()), 146 | Response::Error(s) => Err(s), 147 | _ => Err(Status { 148 | code: StatusCode::AsyncError, 149 | err: "Wrong response type in AsyncDB.".to_string(), 150 | }), 151 | } 152 | } 153 | 154 | pub(crate) fn run_server(mut db: DB, mut recv: impl ReceiverExt) { 155 | let mut snapshots = HashMap::new(); 156 | let mut snapshot_counter: usize = 0; 157 | 158 | while let Some(message) = recv.blocking_recv() { 159 | match message.req { 160 | Request::Close => { 161 | send_response(message.resp_channel, Response::OK); 162 | recv.close(); 163 | return; 164 | } 165 | Request::Put { key, val } => { 166 | let ok = db.put(&key, &val); 167 | send_response_result(message.resp_channel, ok); 168 | } 169 | Request::Delete { key } => { 170 | let ok = db.delete(&key); 171 | send_response_result(message.resp_channel, ok); 172 | } 173 | Request::Write { batch, sync } => { 174 | let ok = db.write(batch, sync); 175 | send_response_result(message.resp_channel, ok); 176 | } 177 | Request::Flush => { 178 | let ok = db.flush(); 179 | send_response_result(message.resp_channel, ok); 180 | } 181 | Request::GetAt { snapshot, key } => { 182 | let snapshot_id = snapshot.0; 183 | if let Some(snapshot) = snapshots.get(&snapshot_id) { 184 | let ok = db.get_at(snapshot, &key); 185 | match ok { 186 | Err(e) => { 187 | send_response(message.resp_channel, Response::Error(e)); 188 | } 189 | Ok(v) => { 190 | send_response(message.resp_channel, Response::Value(v)); 191 | } 192 | }; 193 | } else { 194 | send_response( 195 | message.resp_channel, 196 | Response::Error(Status { 197 | code: StatusCode::AsyncError, 198 | err: "Unknown snapshot reference: this is a bug".to_string(), 199 | }), 200 | ); 201 | } 202 | } 203 | Request::Get { key } => { 204 | let r = db.get(&key); 205 | send_response(message.resp_channel, Response::Value(r)); 206 | } 207 | Request::GetSnapshot => { 208 | snapshots.insert(snapshot_counter, db.get_snapshot()); 209 | let sref = SnapshotRef(snapshot_counter); 210 | snapshot_counter += 1; 211 | send_response(message.resp_channel, Response::Snapshot(sref)); 212 | } 213 | Request::DropSnapshot { snapshot } => { 214 | snapshots.remove(&snapshot.0); 215 | send_response_result(message.resp_channel, Ok(())); 216 | } 217 | Request::CompactRange { from, to } => { 218 | let ok = db.compact_range(&from, &to); 219 | send_response_result(message.resp_channel, ok); 220 | } 221 | } 222 | } 223 | } 224 | } 225 | 226 | pub(crate) trait ReceiverExt { 227 | fn blocking_recv(&mut self) -> Option; 228 | fn close(&mut self); 229 | } 230 | -------------------------------------------------------------------------------- /src/asyncdb_async_std.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::sync::Arc; 3 | 4 | use async_std::channel; 5 | use async_std::task::{spawn_blocking, JoinHandle}; 6 | 7 | use crate::asyncdb::{ReceiverExt, Request, Response, CHANNEL_BUFFER_SIZE}; 8 | use crate::{Options, Result, Status, StatusCode, DB}; 9 | 10 | pub(crate) struct Message { 11 | pub(crate) req: Request, 12 | pub(crate) resp_channel: channel::Sender, 13 | } 14 | /// `AsyncDB` makes it easy to use LevelDB in a async-std runtime. 15 | /// The methods follow very closely the main API (see `DB` type). Iteration is not yet implemented. 16 | #[derive(Clone)] 17 | pub struct AsyncDB { 18 | jh: Arc>, 19 | send: channel::Sender, 20 | } 21 | 22 | impl AsyncDB { 23 | /// Create a new or open an existing database. 24 | pub fn new>(name: P, opts: Options) -> Result { 25 | let db = DB::open(name, opts)?; 26 | 27 | let (send, recv) = channel::bounded(CHANNEL_BUFFER_SIZE); 28 | let jh = spawn_blocking(move || AsyncDB::run_server(db, recv)); 29 | Ok(AsyncDB { 30 | jh: Arc::new(jh), 31 | send, 32 | }) 33 | } 34 | 35 | pub(crate) async fn process_request(&self, req: Request) -> Result { 36 | let (tx, rx) = channel::bounded(1); 37 | 38 | let m = Message { 39 | req, 40 | resp_channel: tx, 41 | }; 42 | if let Err(e) = self.send.send(m).await { 43 | return Err(Status { 44 | code: StatusCode::AsyncError, 45 | err: e.to_string(), 46 | }); 47 | } 48 | let resp = rx.recv().await; 49 | match resp { 50 | Err(e) => Err(Status { 51 | code: StatusCode::AsyncError, 52 | err: e.to_string(), 53 | }), 54 | Ok(r) => Ok(r), 55 | } 56 | } 57 | } 58 | 59 | pub(crate) fn send_response_result(ch: channel::Sender, result: Result<()>) { 60 | if let Err(e) = result { 61 | ch.try_send(Response::Error(e)).ok(); 62 | } else { 63 | ch.try_send(Response::OK).ok(); 64 | } 65 | } 66 | 67 | pub(crate) fn send_response(ch: channel::Sender, res: Response) { 68 | ch.send_blocking(res).ok(); 69 | } 70 | 71 | impl ReceiverExt for channel::Receiver { 72 | fn blocking_recv(&mut self) -> Option { 73 | self.recv_blocking().ok() 74 | } 75 | 76 | fn close(&mut self) { 77 | channel::Receiver::close(self); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/asyncdb_tokio.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::sync::Arc; 3 | 4 | use tokio::sync::mpsc; 5 | use tokio::sync::oneshot; 6 | use tokio::task::{spawn_blocking, JoinHandle}; 7 | 8 | use crate::asyncdb::ReceiverExt; 9 | use crate::asyncdb::CHANNEL_BUFFER_SIZE; 10 | use crate::asyncdb::{Request, Response}; 11 | use crate::{Options, Result, Status, StatusCode, DB}; 12 | 13 | pub(crate) struct Message { 14 | pub(crate) req: Request, 15 | pub(crate) resp_channel: oneshot::Sender, 16 | } 17 | 18 | /// `AsyncDB` makes it easy to use LevelDB in a tokio runtime. 19 | /// The methods follow very closely the main API (see `DB` type). Iteration is not yet implemented. 20 | #[derive(Clone)] 21 | pub struct AsyncDB { 22 | jh: Arc>, 23 | send: mpsc::Sender, 24 | } 25 | 26 | impl AsyncDB { 27 | /// Create a new or open an existing database. 28 | pub fn new>(name: P, opts: Options) -> Result { 29 | let db = DB::open(name, opts)?; 30 | let (send, recv) = mpsc::channel(CHANNEL_BUFFER_SIZE); 31 | 32 | let jh = spawn_blocking(move || AsyncDB::run_server(db, recv)); 33 | Ok(AsyncDB { 34 | jh: Arc::new(jh), 35 | send, 36 | }) 37 | } 38 | pub(crate) async fn process_request(&self, req: Request) -> Result { 39 | let (tx, rx) = oneshot::channel(); 40 | 41 | let m = Message { 42 | req, 43 | resp_channel: tx, 44 | }; 45 | if let Err(e) = self.send.send(m).await { 46 | return Err(Status { 47 | code: StatusCode::AsyncError, 48 | err: e.to_string(), 49 | }); 50 | } 51 | let resp = rx.await; 52 | 53 | match resp { 54 | Err(e) => Err(Status { 55 | code: StatusCode::AsyncError, 56 | err: e.to_string(), 57 | }), 58 | Ok(r) => Ok(r), 59 | } 60 | } 61 | } 62 | 63 | pub(crate) fn send_response_result(ch: oneshot::Sender, result: Result<()>) { 64 | if let Err(e) = result { 65 | ch.send(Response::Error(e)).ok(); 66 | } else { 67 | ch.send(Response::OK).ok(); 68 | } 69 | } 70 | 71 | pub(crate) fn send_response(ch: oneshot::Sender, res: Response) { 72 | ch.send(res).ok(); 73 | } 74 | 75 | impl ReceiverExt for mpsc::Receiver { 76 | fn blocking_recv(&mut self) -> Option { 77 | self.blocking_recv() 78 | } 79 | 80 | fn close(&mut self) { 81 | mpsc::Receiver::close(self); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/benches/maps_bench.rs: -------------------------------------------------------------------------------- 1 | //! Compare different implementations of bytestring->bytestring maps. This is built as separate 2 | //! binary. 3 | 4 | #[macro_use] 5 | extern crate bencher; 6 | extern crate rand; 7 | extern crate rusty_leveldb; 8 | 9 | use bencher::Bencher; 10 | use rand::Rng; 11 | 12 | use rusty_leveldb::DefaultCmp; 13 | use rusty_leveldb::SkipMap; 14 | 15 | use std::collections::BTreeMap; 16 | use std::collections::HashMap; 17 | use std::rc::Rc; 18 | 19 | fn gen_key_val(gen: &mut R, keylen: usize, vallen: usize) -> (Vec, Vec) { 20 | let mut key = Vec::with_capacity(keylen); 21 | let mut val = Vec::with_capacity(vallen); 22 | 23 | for _i in 0..keylen { 24 | key.push(gen.gen_range(b'a'..=b'z')); 25 | } 26 | for _i in 0..vallen { 27 | val.push(gen.gen_range(b'a'..=b'z')); 28 | } 29 | (key, val) 30 | } 31 | 32 | fn bench_gen_key_val(b: &mut Bencher) { 33 | let mut gen = rand::thread_rng(); 34 | b.iter(|| { 35 | let (k, _v) = gen_key_val(&mut gen, 10, 10); 36 | k.len(); 37 | }); 38 | } 39 | 40 | fn bench_skipmap_insert(b: &mut Bencher) { 41 | let mut gen = rand::thread_rng(); 42 | 43 | let mut skm = SkipMap::new(Rc::new(Box::new(DefaultCmp))); 44 | 45 | b.iter(|| { 46 | let (mut k, v) = gen_key_val(&mut gen, 10, 10); 47 | skm.insert(k.clone(), v.clone()); 48 | k[9] += 1; 49 | skm.insert(k.clone(), v.clone()); 50 | k[9] += 1; 51 | skm.insert(k.clone(), v.clone()); 52 | k[9] += 1; 53 | skm.insert(k.clone(), v.clone()); 54 | k[9] += 1; 55 | skm.insert(k.clone(), v.clone()); 56 | k[9] += 1; 57 | skm.insert(k.clone(), v.clone()); 58 | k[9] += 1; 59 | skm.insert(k.clone(), v.clone()); 60 | k[9] += 1; 61 | skm.insert(k.clone(), v.clone()); 62 | k[9] += 1; 63 | skm.insert(k.clone(), v.clone()); 64 | k[9] += 1; 65 | skm.insert(k, v); 66 | }); 67 | } 68 | 69 | fn bench_hashmap_insert(b: &mut Bencher) { 70 | let mut gen = rand::thread_rng(); 71 | let mut hm = HashMap::new(); 72 | 73 | b.iter(|| { 74 | let (mut k, v) = gen_key_val(&mut gen, 10, 10); 75 | hm.insert(k.clone(), v.clone()); 76 | k[9] += 1; 77 | hm.insert(k.clone(), v.clone()); 78 | k[9] += 1; 79 | hm.insert(k.clone(), v.clone()); 80 | k[9] += 1; 81 | hm.insert(k.clone(), v.clone()); 82 | k[9] += 1; 83 | hm.insert(k.clone(), v.clone()); 84 | k[9] += 1; 85 | hm.insert(k.clone(), v.clone()); 86 | k[9] += 1; 87 | hm.insert(k.clone(), v.clone()); 88 | k[9] += 1; 89 | hm.insert(k.clone(), v.clone()); 90 | k[9] += 1; 91 | hm.insert(k.clone(), v.clone()); 92 | k[9] += 1; 93 | hm.insert(k, v); 94 | }); 95 | } 96 | 97 | fn bench_btree_insert(b: &mut Bencher) { 98 | let mut gen = rand::thread_rng(); 99 | let mut btm = BTreeMap::new(); 100 | 101 | b.iter(|| { 102 | let (mut k, v) = gen_key_val(&mut gen, 10, 10); 103 | btm.insert(k.clone(), v.clone()); 104 | k[9] += 1; 105 | btm.insert(k.clone(), v.clone()); 106 | k[9] += 1; 107 | btm.insert(k.clone(), v.clone()); 108 | k[9] += 1; 109 | btm.insert(k.clone(), v.clone()); 110 | k[9] += 1; 111 | btm.insert(k.clone(), v.clone()); 112 | k[9] += 1; 113 | btm.insert(k.clone(), v.clone()); 114 | k[9] += 1; 115 | btm.insert(k.clone(), v.clone()); 116 | k[9] += 1; 117 | btm.insert(k.clone(), v.clone()); 118 | k[9] += 1; 119 | btm.insert(k.clone(), v.clone()); 120 | k[9] += 1; 121 | btm.insert(k, v); 122 | }); 123 | } 124 | 125 | benchmark_group!( 126 | basic, 127 | bench_gen_key_val, 128 | bench_skipmap_insert, 129 | bench_hashmap_insert, 130 | bench_btree_insert, 131 | ); 132 | benchmark_main!(basic); 133 | -------------------------------------------------------------------------------- /src/block_builder.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::Ordering; 2 | 3 | use crate::block::BlockContents; 4 | use crate::options::Options; 5 | 6 | use integer_encoding::{FixedIntWriter, VarIntWriter}; 7 | 8 | /// BlockBuilder contains functionality for building a block consisting of consecutive key-value 9 | /// entries. 10 | pub struct BlockBuilder { 11 | opt: Options, 12 | buffer: Vec, 13 | restarts: Vec, 14 | 15 | last_key: Vec, 16 | restart_counter: usize, 17 | counter: usize, 18 | } 19 | 20 | impl BlockBuilder { 21 | pub fn new(o: Options) -> BlockBuilder { 22 | let mut restarts = vec![0]; 23 | restarts.reserve(1023); 24 | 25 | BlockBuilder { 26 | buffer: Vec::with_capacity(o.block_size), 27 | opt: o, 28 | restarts, 29 | last_key: Vec::new(), 30 | restart_counter: 0, 31 | counter: 0, 32 | } 33 | } 34 | 35 | pub fn entries(&self) -> usize { 36 | self.counter 37 | } 38 | 39 | pub fn last_key(&self) -> &[u8] { 40 | &self.last_key 41 | } 42 | 43 | pub fn size_estimate(&self) -> usize { 44 | self.buffer.len() + 4 * self.restarts.len() + 4 45 | } 46 | 47 | pub fn reset(&mut self) { 48 | self.buffer.clear(); 49 | self.restarts.clear(); 50 | self.last_key.clear(); 51 | self.restart_counter = 0; 52 | self.counter = 0; 53 | } 54 | 55 | pub fn add(&mut self, key: &[u8], val: &[u8]) { 56 | assert!(self.restart_counter <= self.opt.block_restart_interval); 57 | assert!( 58 | self.buffer.is_empty() 59 | || self.opt.cmp.cmp(self.last_key.as_slice(), key) == Ordering::Less 60 | ); 61 | 62 | let mut shared = 0; 63 | 64 | if self.restart_counter < self.opt.block_restart_interval { 65 | let smallest = if self.last_key.len() < key.len() { 66 | self.last_key.len() 67 | } else { 68 | key.len() 69 | }; 70 | 71 | while shared < smallest && self.last_key[shared] == key[shared] { 72 | shared += 1; 73 | } 74 | } else { 75 | self.restarts.push(self.buffer.len() as u32); 76 | self.last_key.clear(); 77 | self.restart_counter = 0; 78 | } 79 | 80 | let non_shared = key.len() - shared; 81 | 82 | self.buffer 83 | .write_varint(shared) 84 | .expect("write to buffer failed"); 85 | self.buffer 86 | .write_varint(non_shared) 87 | .expect("write to buffer failed"); 88 | self.buffer 89 | .write_varint(val.len()) 90 | .expect("write to buffer failed"); 91 | self.buffer.extend_from_slice(&key[shared..]); 92 | self.buffer.extend_from_slice(val); 93 | 94 | // Update key 95 | self.last_key.resize(shared, 0); 96 | self.last_key.extend_from_slice(&key[shared..]); 97 | 98 | self.restart_counter += 1; 99 | self.counter += 1; 100 | } 101 | 102 | pub fn finish(mut self) -> BlockContents { 103 | self.buffer.reserve(self.restarts.len() * 4 + 4); 104 | 105 | // 1. Append RESTARTS 106 | for r in self.restarts.iter() { 107 | self.buffer 108 | .write_fixedint(*r) 109 | .expect("write to buffer failed"); 110 | } 111 | 112 | // 2. Append N_RESTARTS 113 | self.buffer 114 | .write_fixedint(self.restarts.len() as u32) 115 | .expect("write to buffer failed"); 116 | 117 | // done 118 | self.buffer 119 | } 120 | } 121 | 122 | #[cfg(test)] 123 | mod tests { 124 | use super::*; 125 | use crate::options; 126 | 127 | fn get_data() -> Vec<(&'static [u8], &'static [u8])> { 128 | vec![ 129 | ("key1".as_bytes(), "value1".as_bytes()), 130 | ( 131 | "loooooooooooooooooooooooooooooooooongerkey1".as_bytes(), 132 | "shrtvl1".as_bytes(), 133 | ), 134 | ("medium length key 1".as_bytes(), "some value 2".as_bytes()), 135 | ("prefix_key1".as_bytes(), "value".as_bytes()), 136 | ("prefix_key2".as_bytes(), "value".as_bytes()), 137 | ("prefix_key3".as_bytes(), "value".as_bytes()), 138 | ] 139 | } 140 | 141 | #[test] 142 | fn test_block_builder_sanity() { 143 | let mut o = options::for_test(); 144 | o.block_restart_interval = 3; 145 | let mut builder = BlockBuilder::new(o); 146 | let d = get_data(); 147 | 148 | for &(k, v) in d.iter() { 149 | builder.add(k, v); 150 | assert!(builder.restart_counter <= 3); 151 | assert_eq!(builder.last_key(), k); 152 | } 153 | 154 | assert_eq!(149, builder.size_estimate()); 155 | assert_eq!(d.len(), builder.entries()); 156 | 157 | let block = builder.finish(); 158 | assert_eq!(block.len(), 149); 159 | } 160 | 161 | #[test] 162 | fn test_block_builder_reset() { 163 | let mut o = options::for_test(); 164 | o.block_restart_interval = 3; 165 | let mut builder = BlockBuilder::new(o); 166 | let d = get_data(); 167 | 168 | for &(k, v) in d.iter() { 169 | builder.add(k, v); 170 | assert!(builder.restart_counter <= 3); 171 | assert_eq!(builder.last_key(), k); 172 | } 173 | 174 | assert_eq!(d.len(), builder.entries()); 175 | builder.reset(); 176 | assert_eq!(0, builder.entries()); 177 | assert_eq!(4, builder.size_estimate()); 178 | } 179 | 180 | #[test] 181 | #[should_panic] 182 | fn test_block_builder_panics() { 183 | let mut d = get_data(); 184 | // Identical key as d[3]. 185 | d[4].0 = b"prefix_key1"; 186 | 187 | let mut builder = BlockBuilder::new(options::for_test()); 188 | for &(k, v) in d.iter() { 189 | builder.add(k, v); 190 | assert_eq!(k, builder.last_key()); 191 | } 192 | } 193 | // Additional test coverage is provided by tests in block.rs. 194 | } 195 | -------------------------------------------------------------------------------- /src/blockhandle.rs: -------------------------------------------------------------------------------- 1 | use integer_encoding::VarInt; 2 | 3 | /// Contains an offset and a length (or size); can be efficiently encoded in to varints. This is 4 | /// used typically as file-internal pointer in table (SSTable) files. For example, the index block 5 | /// in an SSTable is a block of (key = largest key in block) -> (value = encoded blockhandle of 6 | /// block). 7 | #[derive(Debug, Clone)] 8 | pub struct BlockHandle { 9 | offset: usize, 10 | size: usize, 11 | } 12 | 13 | impl BlockHandle { 14 | /// Decodes a block handle from `from` and returns a block handle 15 | /// together with how many bytes were read from the slice. 16 | pub fn decode(from: &[u8]) -> Option<(BlockHandle, usize)> { 17 | let (off, offsize) = usize::decode_var(from)?; 18 | let (sz, szsize) = usize::decode_var(&from[offsize..])?; 19 | 20 | Some(( 21 | BlockHandle { 22 | offset: off, 23 | size: sz, 24 | }, 25 | offsize + szsize, 26 | )) 27 | } 28 | 29 | pub fn new(offset: usize, size: usize) -> BlockHandle { 30 | BlockHandle { offset, size } 31 | } 32 | 33 | pub fn offset(&self) -> usize { 34 | self.offset 35 | } 36 | 37 | pub fn size(&self) -> usize { 38 | self.size 39 | } 40 | 41 | /// Returns how many bytes were written, or 0 if the write failed because `dst` is too small. 42 | pub fn encode_to(&self, dst: &mut [u8]) -> usize { 43 | assert!(dst.len() >= self.offset.required_space() + self.size.required_space()); 44 | 45 | let off = self.offset.encode_var(dst); 46 | off + self.size.encode_var(&mut dst[off..]) 47 | } 48 | } 49 | 50 | #[cfg(test)] 51 | mod tests { 52 | use super::*; 53 | 54 | #[test] 55 | fn test_blockhandle() { 56 | let bh = BlockHandle::new(890, 777); 57 | let mut dst = [0_u8; 128]; 58 | let enc_sz = bh.encode_to(&mut dst[..]); 59 | 60 | let (bh2, dec_sz) = BlockHandle::decode(&dst).unwrap(); 61 | 62 | assert_eq!(enc_sz, dec_sz); 63 | assert_eq!(bh.size(), bh2.size()); 64 | assert_eq!(bh.offset(), bh2.offset()); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/cache.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::mem::swap; 3 | 4 | // No clone, no copy! That asserts that an LRUHandle exists only once. 5 | struct LRUHandle(*mut LRUNode); 6 | 7 | struct LRUNode { 8 | next: Option>>, // None in the list's last node 9 | prev: Option<*mut LRUNode>, 10 | data: Option, // if None, then we have reached the head node 11 | } 12 | 13 | struct LRUList { 14 | head: LRUNode, 15 | count: usize, 16 | } 17 | 18 | /// This is likely unstable; more investigation is needed into correct behavior! 19 | impl LRUList { 20 | fn new() -> LRUList { 21 | LRUList { 22 | head: LRUNode { 23 | data: None, 24 | next: None, 25 | prev: None, 26 | }, 27 | count: 0, 28 | } 29 | } 30 | 31 | /// Inserts new element at front (least recently used element) 32 | fn insert(&mut self, elem: T) -> LRUHandle { 33 | self.count += 1; 34 | // Not first element 35 | if self.head.next.is_some() { 36 | let mut new = Box::new(LRUNode { 37 | data: Some(elem), 38 | next: None, 39 | prev: Some(&mut self.head as *mut LRUNode), 40 | }); 41 | let newp = new.as_mut() as *mut LRUNode; 42 | 43 | // Set up the node after the new one 44 | self.head.next.as_mut().unwrap().prev = Some(newp); 45 | // Replace head.next with None and set the new node's next to that 46 | new.next = self.head.next.take(); 47 | self.head.next = Some(new); 48 | 49 | LRUHandle(newp) 50 | } else { 51 | // First node; the only node right now is an empty head node 52 | let mut new = Box::new(LRUNode { 53 | data: Some(elem), 54 | next: None, 55 | prev: Some(&mut self.head as *mut LRUNode), 56 | }); 57 | let newp = new.as_mut() as *mut LRUNode; 58 | 59 | // Set tail 60 | self.head.prev = Some(newp); 61 | // Set first node 62 | self.head.next = Some(new); 63 | 64 | LRUHandle(newp) 65 | } 66 | } 67 | 68 | fn remove_last(&mut self) -> Option { 69 | if self.count() == 0 { 70 | return None; 71 | } 72 | let mut lasto = unsafe { (*((*self.head.prev.unwrap()).prev.unwrap())).next.take() }; 73 | 74 | assert!(lasto.is_some()); 75 | if let Some(ref mut last) = lasto { 76 | assert!(last.prev.is_some()); 77 | assert!(self.head.prev.is_some()); 78 | self.head.prev = last.prev; 79 | self.count -= 1; 80 | last.data.take() 81 | } else { 82 | None 83 | } 84 | } 85 | 86 | fn remove(&mut self, node_handle: LRUHandle) -> T { 87 | unsafe { 88 | let d = (*node_handle.0).data.take().unwrap(); 89 | // Take ownership of node to be removed. 90 | let mut current = (*(*node_handle.0).prev.unwrap()).next.take().unwrap(); 91 | let prev = current.prev.unwrap(); 92 | // Update previous node's successor. 93 | if current.next.is_some() { 94 | // Update next node's predecessor. 95 | current.next.as_mut().unwrap().prev = current.prev.take(); 96 | } 97 | (*prev).next = current.next.take(); 98 | 99 | self.count -= 1; 100 | 101 | d 102 | } 103 | } 104 | 105 | /// Reinserts the referenced node at the front. 106 | fn reinsert_front(&mut self, node_handle: &LRUHandle) { 107 | unsafe { 108 | let prevp = (*node_handle.0).prev.unwrap(); 109 | 110 | // If not last node, update following node's prev 111 | if let Some(next) = (*node_handle.0).next.as_mut() { 112 | next.prev = Some(prevp); 113 | } else { 114 | // If last node, update head 115 | self.head.prev = Some(prevp); 116 | } 117 | 118 | // Swap this.next with prev.next. After that, this.next refers to this (!) 119 | swap(&mut (*prevp).next, &mut (*node_handle.0).next); 120 | // To reinsert at head, swap head's next with this.next 121 | swap(&mut (*node_handle.0).next, &mut self.head.next); 122 | // Update this' prev reference to point to head. 123 | 124 | // Update the second node's prev reference. 125 | if let Some(ref mut newnext) = (*node_handle.0).next { 126 | (*node_handle.0).prev = newnext.prev; 127 | newnext.prev = Some(node_handle.0); 128 | } else { 129 | // Only one node, being the last one; avoid head.prev pointing to head 130 | self.head.prev = Some(node_handle.0); 131 | } 132 | 133 | assert!(self.head.next.is_some()); 134 | assert!(self.head.prev.is_some()); 135 | } 136 | } 137 | 138 | fn count(&self) -> usize { 139 | self.count 140 | } 141 | 142 | fn _testing_head_ref(&self) -> Option<&T> { 143 | if let Some(ref first) = self.head.next { 144 | first.data.as_ref() 145 | } else { 146 | None 147 | } 148 | } 149 | } 150 | 151 | pub type CacheKey = [u8; 16]; 152 | pub type CacheID = u64; 153 | type CacheEntry = (T, LRUHandle); 154 | 155 | /// Implementation of `ShardedLRUCache`. 156 | /// Based on a HashMap; the elements are linked in order to support the LRU ordering. 157 | pub struct Cache { 158 | // note: CacheKeys (Vec) are duplicated between list and map. If this turns out to be a 159 | // performance bottleneck, another layer of indirection™ can solve this by mapping the key 160 | // to a numeric handle that keys both list and map. 161 | list: LRUList, 162 | map: HashMap>, 163 | cap: usize, 164 | id: u64, 165 | } 166 | 167 | impl Cache { 168 | pub fn new(capacity: usize) -> Cache { 169 | assert!(capacity > 0); 170 | Cache { 171 | list: LRUList::new(), 172 | map: HashMap::with_capacity(1024), 173 | cap: capacity, 174 | id: 0, 175 | } 176 | } 177 | 178 | /// Returns an ID that is unique for this cache and that can be used to partition the cache 179 | /// among several users. 180 | pub fn new_cache_id(&mut self) -> CacheID { 181 | self.id += 1; 182 | self.id 183 | } 184 | 185 | /// How many the cache currently contains 186 | pub fn count(&self) -> usize { 187 | self.list.count() 188 | } 189 | 190 | /// The capacity of this cache 191 | pub fn cap(&self) -> usize { 192 | self.cap 193 | } 194 | 195 | /// Insert a new element into the cache. The returned `CacheHandle` can be used for further 196 | /// operations on that element. 197 | /// If the capacity has been reached, the least recently used element is removed from the 198 | /// cache. 199 | pub fn insert(&mut self, key: &CacheKey, elem: T) { 200 | if self.list.count() >= self.cap { 201 | if let Some(removed_key) = self.list.remove_last() { 202 | assert!(self.map.remove(&removed_key).is_some()); 203 | } else { 204 | panic!("could not remove_last(); bug!"); 205 | } 206 | } 207 | 208 | let lru_handle = self.list.insert(*key); 209 | self.map.insert(*key, (elem, lru_handle)); 210 | } 211 | 212 | /// Retrieve an element from the cache. 213 | /// If the element has been preempted from the cache in the meantime, this returns None. 214 | pub fn get<'a>(&'a mut self, key: &CacheKey) -> Option<&'a T> { 215 | match self.map.get(key) { 216 | None => None, 217 | Some((elem, lru_handle)) => { 218 | self.list.reinsert_front(&lru_handle); 219 | Some(elem) 220 | } 221 | } 222 | } 223 | 224 | /// Remove an element from the cache (for invalidation). 225 | pub fn remove(&mut self, key: &CacheKey) -> Option { 226 | match self.map.remove(key) { 227 | None => None, 228 | Some((elem, lru_handle)) => { 229 | self.list.remove(lru_handle); 230 | Some(elem) 231 | } 232 | } 233 | } 234 | } 235 | 236 | #[cfg(test)] 237 | mod tests { 238 | use super::LRUList; 239 | use super::*; 240 | 241 | fn make_key(a: u8, b: u8, c: u8) -> CacheKey { 242 | [a, b, c, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] 243 | } 244 | 245 | #[test] 246 | fn test_blockcache_cache_add_rm() { 247 | let mut cache = Cache::new(128); 248 | 249 | let h_123 = make_key(1, 2, 3); 250 | let h_521 = make_key(1, 2, 4); 251 | let h_372 = make_key(3, 4, 5); 252 | let h_332 = make_key(6, 3, 1); 253 | let h_899 = make_key(8, 2, 1); 254 | 255 | cache.insert(&h_123, 123); 256 | cache.insert(&h_332, 332); 257 | cache.insert(&h_521, 521); 258 | cache.insert(&h_372, 372); 259 | cache.insert(&h_899, 899); 260 | 261 | assert_eq!(cache.count(), 5); 262 | 263 | assert_eq!(cache.get(&h_123), Some(&123)); 264 | assert_eq!(cache.get(&h_372), Some(&372)); 265 | 266 | assert_eq!(cache.remove(&h_521), Some(521)); 267 | assert_eq!(cache.get(&h_521), None); 268 | assert_eq!(cache.remove(&h_521), None); 269 | 270 | assert_eq!(cache.count(), 4); 271 | } 272 | 273 | #[test] 274 | fn test_blockcache_cache_capacity() { 275 | let mut cache = Cache::new(3); 276 | 277 | let h_123 = make_key(1, 2, 3); 278 | let h_521 = make_key(1, 2, 4); 279 | let h_372 = make_key(3, 4, 5); 280 | let h_332 = make_key(6, 3, 1); 281 | let h_899 = make_key(8, 2, 1); 282 | 283 | cache.insert(&h_123, 123); 284 | cache.insert(&h_332, 332); 285 | cache.insert(&h_521, 521); 286 | cache.insert(&h_372, 372); 287 | cache.insert(&h_899, 899); 288 | 289 | assert_eq!(cache.count(), 3); 290 | 291 | assert_eq!(cache.get(&h_123), None); 292 | assert_eq!(cache.get(&h_332), None); 293 | assert_eq!(cache.get(&h_521), Some(&521)); 294 | assert_eq!(cache.get(&h_372), Some(&372)); 295 | assert_eq!(cache.get(&h_899), Some(&899)); 296 | } 297 | 298 | #[test] 299 | fn test_blockcache_lru_remove() { 300 | let mut lru = LRUList::::new(); 301 | 302 | let h_56 = lru.insert(56); 303 | lru.insert(22); 304 | lru.insert(223); 305 | let h_244 = lru.insert(244); 306 | lru.insert(1111); 307 | let h_12 = lru.insert(12); 308 | 309 | assert_eq!(lru.count(), 6); 310 | assert_eq!(244, lru.remove(h_244)); 311 | assert_eq!(lru.count(), 5); 312 | assert_eq!(12, lru.remove(h_12)); 313 | assert_eq!(lru.count(), 4); 314 | assert_eq!(56, lru.remove(h_56)); 315 | assert_eq!(lru.count(), 3); 316 | } 317 | 318 | #[test] 319 | fn test_blockcache_lru_1() { 320 | let mut lru = LRUList::::new(); 321 | 322 | lru.insert(56); 323 | lru.insert(22); 324 | lru.insert(244); 325 | lru.insert(12); 326 | 327 | assert_eq!(lru.count(), 4); 328 | 329 | assert_eq!(Some(56), lru.remove_last()); 330 | assert_eq!(Some(22), lru.remove_last()); 331 | assert_eq!(Some(244), lru.remove_last()); 332 | 333 | assert_eq!(lru.count(), 1); 334 | 335 | assert_eq!(Some(12), lru.remove_last()); 336 | 337 | assert_eq!(lru.count(), 0); 338 | 339 | assert_eq!(None, lru.remove_last()); 340 | } 341 | 342 | #[test] 343 | fn test_blockcache_lru_reinsert() { 344 | let mut lru = LRUList::::new(); 345 | 346 | let handle1 = lru.insert(56); 347 | let handle2 = lru.insert(22); 348 | let handle3 = lru.insert(244); 349 | 350 | assert_eq!(lru._testing_head_ref().copied().unwrap(), 244); 351 | 352 | lru.reinsert_front(&handle1); 353 | 354 | assert_eq!(lru._testing_head_ref().copied().unwrap(), 56); 355 | 356 | lru.reinsert_front(&handle3); 357 | 358 | assert_eq!(lru._testing_head_ref().copied().unwrap(), 244); 359 | 360 | lru.reinsert_front(&handle2); 361 | 362 | assert_eq!(lru._testing_head_ref().copied().unwrap(), 22); 363 | 364 | assert_eq!(lru.remove_last(), Some(56)); 365 | assert_eq!(lru.remove_last(), Some(244)); 366 | assert_eq!(lru.remove_last(), Some(22)); 367 | } 368 | 369 | #[test] 370 | fn test_blockcache_lru_reinsert_2() { 371 | let mut lru = LRUList::::new(); 372 | 373 | let handles = [ 374 | lru.insert(0), 375 | lru.insert(1), 376 | lru.insert(2), 377 | lru.insert(3), 378 | lru.insert(4), 379 | lru.insert(5), 380 | lru.insert(6), 381 | lru.insert(7), 382 | lru.insert(8), 383 | ]; 384 | 385 | (0..9).for_each(|i| { 386 | lru.reinsert_front(&handles[i]); 387 | assert_eq!(lru._testing_head_ref().copied(), Some(i)); 388 | }); 389 | } 390 | 391 | #[test] 392 | fn test_blockcache_lru_edge_cases() { 393 | let mut lru = LRUList::::new(); 394 | 395 | let handle = lru.insert(3); 396 | 397 | lru.reinsert_front(&handle); 398 | assert_eq!(lru._testing_head_ref().copied(), Some(3)); 399 | assert_eq!(lru.remove_last(), Some(3)); 400 | assert_eq!(lru.remove_last(), None); 401 | assert_eq!(lru.remove_last(), None); 402 | } 403 | } 404 | -------------------------------------------------------------------------------- /src/cmp.rs: -------------------------------------------------------------------------------- 1 | use crate::key_types::{self, LookupKey}; 2 | use crate::types; 3 | 4 | use std::cmp::Ordering; 5 | use std::rc::Rc; 6 | 7 | type WrappedCmp = Rc>; 8 | 9 | /// Comparator trait, supporting types that can be nested (i.e., add additional functionality on 10 | /// top of an inner comparator) 11 | pub trait Cmp { 12 | /// Compare to byte strings, bytewise. 13 | fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering; 14 | 15 | /// Return the shortest byte string that compares "Greater" to the first argument and "Less" to 16 | /// the second one. 17 | fn find_shortest_sep(&self, from: &[u8], to: &[u8]) -> Vec; 18 | /// Return the shortest byte string that compares "Greater" to the argument. 19 | fn find_short_succ(&self, key: &[u8]) -> Vec; 20 | 21 | /// A unique identifier for a comparator. A comparator wrapper (like InternalKeyCmp) may 22 | /// return the id of its inner comparator. 23 | fn id(&self) -> &'static str; 24 | } 25 | 26 | /// The default byte-wise comparator. 27 | #[derive(Clone)] 28 | pub struct DefaultCmp; 29 | 30 | impl Cmp for DefaultCmp { 31 | fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering { 32 | a.cmp(b) 33 | } 34 | 35 | fn id(&self) -> &'static str { 36 | "leveldb.BytewiseComparator" 37 | } 38 | 39 | fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec { 40 | if a == b { 41 | return a.to_vec(); 42 | } 43 | 44 | let min = if a.len() < b.len() { a.len() } else { b.len() }; 45 | let mut diff_at = 0; 46 | 47 | while diff_at < min && a[diff_at] == b[diff_at] { 48 | diff_at += 1; 49 | } 50 | 51 | // First, try to find a short separator. If that fails, try a backup mechanism below. 52 | while diff_at < min { 53 | let diff = a[diff_at]; 54 | if diff < 0xff && diff + 1 < b[diff_at] { 55 | let mut sep = Vec::from(&a[0..diff_at + 1]); 56 | sep[diff_at] += 1; 57 | assert!(self.cmp(&sep, b) == Ordering::Less); 58 | return sep; 59 | } 60 | 61 | diff_at += 1; 62 | } 63 | 64 | let mut sep = Vec::with_capacity(a.len() + 1); 65 | sep.extend_from_slice(a); 66 | // Try increasing a and check if it's still smaller than b. First find the last byte 67 | // smaller than 0xff, and then increment that byte. Only if the separator is lesser than b, 68 | // return it. 69 | let mut i = a.len() - 1; 70 | while i > 0 && sep[i] == 0xff { 71 | i -= 1; 72 | } 73 | if sep[i] < 0xff { 74 | sep[i] += 1; 75 | if self.cmp(&sep, b) == Ordering::Less { 76 | return sep; 77 | } else { 78 | sep[i] -= 1; 79 | } 80 | } 81 | 82 | // Backup case: either `a` is full of 0xff, or all different places are less than 2 83 | // characters apart. 84 | // The result is not necessarily short, but a good separator: e.g., "abc" vs "abd" -> 85 | // "abc\0", which is greater than abc and lesser than abd. 86 | // Append a 0 byte; by making it longer than a, it will compare greater to it. 87 | sep.extend_from_slice(&[0]); 88 | sep 89 | } 90 | 91 | fn find_short_succ(&self, a: &[u8]) -> Vec { 92 | let mut result = a.to_vec(); 93 | for i in 0..a.len() { 94 | if a[i] != 0xff { 95 | result[i] += 1; 96 | result.resize(i + 1, 0); 97 | return result; 98 | } 99 | } 100 | // Rare path 101 | result.push(255); 102 | result 103 | } 104 | } 105 | 106 | /// Same as memtable_key_cmp, but for InternalKeys. 107 | #[derive(Clone)] 108 | pub struct InternalKeyCmp(pub Rc>); 109 | 110 | impl Cmp for InternalKeyCmp { 111 | fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering { 112 | key_types::cmp_internal_key(self.0.as_ref().as_ref(), a, b) 113 | } 114 | 115 | fn id(&self) -> &'static str { 116 | self.0.id() 117 | } 118 | 119 | fn find_shortest_sep(&self, a: &[u8], b: &[u8]) -> Vec { 120 | if a == b { 121 | return a.to_vec(); 122 | } 123 | 124 | let (_, seqa, keya) = key_types::parse_internal_key(a); 125 | let (_, _, keyb) = key_types::parse_internal_key(b); 126 | 127 | let sep: Vec = self.0.find_shortest_sep(keya, keyb); 128 | 129 | if sep.len() < keya.len() && self.0.cmp(keya, &sep) == Ordering::Less { 130 | return LookupKey::new(&sep, types::MAX_SEQUENCE_NUMBER) 131 | .internal_key() 132 | .to_vec(); 133 | } 134 | LookupKey::new(&sep, seqa).internal_key().to_vec() 135 | } 136 | 137 | fn find_short_succ(&self, a: &[u8]) -> Vec { 138 | let (_, seq, key) = key_types::parse_internal_key(a); 139 | let succ: Vec = self.0.find_short_succ(key); 140 | LookupKey::new(&succ, seq).internal_key().to_vec() 141 | } 142 | } 143 | 144 | impl InternalKeyCmp { 145 | /// cmp_inner compares a and b using the underlying comparator (the "user comparator"). 146 | pub fn cmp_inner(&self, a: &[u8], b: &[u8]) -> Ordering { 147 | self.0.cmp(a, b) 148 | } 149 | } 150 | 151 | /// An internal comparator wrapping a user-supplied comparator. This comparator is used to compare 152 | /// memtable keys, which contain length prefixes and a sequence number. 153 | /// The ordering is determined by asking the wrapped comparator; ties are broken by *reverse* 154 | /// ordering the sequence numbers. (This means that when having an entry abx/4 and seRching for 155 | /// abx/5, then abx/4 is counted as "greater-or-equal", making snapshot functionality work at all) 156 | #[derive(Clone)] 157 | pub struct MemtableKeyCmp(pub Rc>); 158 | 159 | impl Cmp for MemtableKeyCmp { 160 | fn cmp(&self, a: &[u8], b: &[u8]) -> Ordering { 161 | key_types::cmp_memtable_key(self.0.as_ref().as_ref(), a, b) 162 | } 163 | 164 | fn id(&self) -> &'static str { 165 | self.0.id() 166 | } 167 | 168 | // The following two impls should not be used (by principle) although they should be correct. 169 | // They will crash the program. 170 | fn find_shortest_sep(&self, _: &[u8], _: &[u8]) -> Vec { 171 | panic!("find* functions are invalid on MemtableKeyCmp"); 172 | } 173 | 174 | fn find_short_succ(&self, _: &[u8]) -> Vec { 175 | panic!("find* functions are invalid on MemtableKeyCmp"); 176 | } 177 | } 178 | 179 | #[cfg(test)] 180 | mod tests { 181 | use super::*; 182 | use key_types::LookupKey; 183 | 184 | #[test] 185 | fn test_cmp_defaultcmp_shortest_sep() { 186 | assert_eq!( 187 | DefaultCmp.find_shortest_sep("abcd".as_bytes(), "abcf".as_bytes()), 188 | "abce".as_bytes() 189 | ); 190 | assert_eq!( 191 | DefaultCmp.find_shortest_sep("abc".as_bytes(), "acd".as_bytes()), 192 | "abd".as_bytes() 193 | ); 194 | assert_eq!( 195 | DefaultCmp.find_shortest_sep("abcdefghi".as_bytes(), "abcffghi".as_bytes()), 196 | "abce".as_bytes() 197 | ); 198 | assert_eq!( 199 | DefaultCmp.find_shortest_sep("a".as_bytes(), "a".as_bytes()), 200 | "a".as_bytes() 201 | ); 202 | assert_eq!( 203 | DefaultCmp.find_shortest_sep("a".as_bytes(), "b".as_bytes()), 204 | "a\0".as_bytes() 205 | ); 206 | assert_eq!( 207 | DefaultCmp.find_shortest_sep("abc".as_bytes(), "zzz".as_bytes()), 208 | "b".as_bytes() 209 | ); 210 | assert_eq!( 211 | DefaultCmp.find_shortest_sep("yyy".as_bytes(), "z".as_bytes()), 212 | "yyz".as_bytes() 213 | ); 214 | assert_eq!( 215 | DefaultCmp.find_shortest_sep("".as_bytes(), "".as_bytes()), 216 | "".as_bytes() 217 | ); 218 | } 219 | 220 | #[test] 221 | fn test_cmp_defaultcmp_short_succ() { 222 | assert_eq!( 223 | DefaultCmp.find_short_succ("abcd".as_bytes()), 224 | "b".as_bytes() 225 | ); 226 | assert_eq!( 227 | DefaultCmp.find_short_succ("zzzz".as_bytes()), 228 | "{".as_bytes() 229 | ); 230 | assert_eq!(DefaultCmp.find_short_succ(&[]), &[0xff]); 231 | assert_eq!( 232 | DefaultCmp.find_short_succ(&[0xff, 0xff, 0xff]), 233 | &[0xff, 0xff, 0xff, 0xff] 234 | ); 235 | } 236 | 237 | #[test] 238 | fn test_cmp_internalkeycmp_shortest_sep() { 239 | let cmp = InternalKeyCmp(Rc::new(Box::new(DefaultCmp))); 240 | assert_eq!( 241 | cmp.find_shortest_sep( 242 | LookupKey::new("abcd".as_bytes(), 1).internal_key(), 243 | LookupKey::new("abcf".as_bytes(), 2).internal_key() 244 | ), 245 | LookupKey::new("abce".as_bytes(), 1).internal_key() 246 | ); 247 | assert_eq!( 248 | cmp.find_shortest_sep( 249 | LookupKey::new("abcd".as_bytes(), 1).internal_key(), 250 | LookupKey::new("abce".as_bytes(), 2).internal_key() 251 | ), 252 | LookupKey::new("abcd\0".as_bytes(), 1).internal_key() 253 | ); 254 | assert_eq!( 255 | cmp.find_shortest_sep( 256 | LookupKey::new("abc".as_bytes(), 1).internal_key(), 257 | LookupKey::new("zzz".as_bytes(), 2).internal_key() 258 | ), 259 | LookupKey::new("b".as_bytes(), types::MAX_SEQUENCE_NUMBER).internal_key() 260 | ); 261 | assert_eq!( 262 | cmp.find_shortest_sep( 263 | LookupKey::new("abc".as_bytes(), 1).internal_key(), 264 | LookupKey::new("acd".as_bytes(), 2).internal_key() 265 | ), 266 | LookupKey::new("abd".as_bytes(), 1).internal_key() 267 | ); 268 | assert_eq!( 269 | cmp.find_shortest_sep( 270 | LookupKey::new("abc".as_bytes(), 1).internal_key(), 271 | LookupKey::new("abe".as_bytes(), 2).internal_key() 272 | ), 273 | LookupKey::new("abd".as_bytes(), 1).internal_key() 274 | ); 275 | assert_eq!( 276 | cmp.find_shortest_sep( 277 | LookupKey::new("".as_bytes(), 1).internal_key(), 278 | LookupKey::new("".as_bytes(), 2).internal_key() 279 | ), 280 | LookupKey::new("".as_bytes(), 1).internal_key() 281 | ); 282 | assert_eq!( 283 | cmp.find_shortest_sep( 284 | LookupKey::new("abc".as_bytes(), 2).internal_key(), 285 | LookupKey::new("abc".as_bytes(), 2).internal_key() 286 | ), 287 | LookupKey::new("abc".as_bytes(), 2).internal_key() 288 | ); 289 | } 290 | 291 | #[test] 292 | fn test_cmp_internalkeycmp() { 293 | let cmp = InternalKeyCmp(Rc::new(Box::new(DefaultCmp))); 294 | // a < b < c 295 | let a = LookupKey::new("abc".as_bytes(), 2).internal_key().to_vec(); 296 | let b = LookupKey::new("abc".as_bytes(), 1).internal_key().to_vec(); 297 | let c = LookupKey::new("abd".as_bytes(), 3).internal_key().to_vec(); 298 | let d = "xyy".as_bytes(); 299 | let e = "xyz".as_bytes(); 300 | 301 | assert_eq!(Ordering::Less, cmp.cmp(&a, &b)); 302 | assert_eq!(Ordering::Equal, cmp.cmp(&a, &a)); 303 | assert_eq!(Ordering::Greater, cmp.cmp(&b, &a)); 304 | assert_eq!(Ordering::Less, cmp.cmp(&a, &c)); 305 | assert_eq!(Ordering::Less, cmp.cmp_inner(d, e)); 306 | assert_eq!(Ordering::Greater, cmp.cmp_inner(e, d)); 307 | } 308 | 309 | #[test] 310 | #[should_panic] 311 | fn test_cmp_memtablekeycmp_panics() { 312 | let cmp = MemtableKeyCmp(Rc::new(Box::new(DefaultCmp))); 313 | cmp.cmp(&[1, 2, 3], &[4, 5, 6]); 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/compressor.rs: -------------------------------------------------------------------------------- 1 | /// Custom compression method 2 | /// 3 | /// ``` 4 | /// # use rusty_leveldb::{Compressor, CompressorId}; 5 | /// 6 | /// #[derive(Debug, Clone, Copy, Default)] 7 | /// pub struct CustomCompressor; 8 | /// 9 | /// impl CompressorId for CustomCompressor { 10 | /// // a unique id to identify what compressor should DB use 11 | /// const ID: u8 = 42; 12 | /// } 13 | /// 14 | /// impl Compressor for CustomCompressor { 15 | /// fn encode(&self, block: Vec) -> rusty_leveldb::Result> { 16 | /// // Do something 17 | /// Ok(block) 18 | /// } 19 | /// 20 | /// fn decode(&self, block: Vec) -> rusty_leveldb::Result> { 21 | /// // Do something 22 | /// Ok(block) 23 | /// } 24 | /// } 25 | /// ``` 26 | /// 27 | /// See [crate::CompressorList] for usage 28 | pub trait Compressor { 29 | fn encode(&self, block: Vec) -> crate::Result>; 30 | 31 | fn decode(&self, block: Vec) -> crate::Result>; 32 | } 33 | 34 | /// Set default compressor id 35 | pub trait CompressorId { 36 | const ID: u8; 37 | } 38 | 39 | /// A compressor that do **Nothing** 40 | /// 41 | /// It default id is `0` 42 | #[derive(Debug, Clone, Copy, Default)] 43 | pub struct NoneCompressor; 44 | 45 | impl CompressorId for NoneCompressor { 46 | const ID: u8 = 0; 47 | } 48 | 49 | impl Compressor for NoneCompressor { 50 | fn encode(&self, block: Vec) -> crate::Result> { 51 | Ok(block) 52 | } 53 | 54 | fn decode(&self, block: Vec) -> crate::Result> { 55 | Ok(block) 56 | } 57 | } 58 | 59 | /// A compressor that compress data with Google's Snappy 60 | /// 61 | /// It default id is `1` 62 | #[derive(Debug, Clone, Copy, Default)] 63 | pub struct SnappyCompressor; 64 | 65 | impl CompressorId for SnappyCompressor { 66 | const ID: u8 = 1; 67 | } 68 | 69 | impl Compressor for SnappyCompressor { 70 | fn encode(&self, block: Vec) -> crate::Result> { 71 | Ok(snap::raw::Encoder::new().compress_vec(&block)?) 72 | } 73 | 74 | fn decode(&self, block: Vec) -> crate::Result> { 75 | Ok(snap::raw::Decoder::new().decompress_vec(&block)?) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/crc.rs: -------------------------------------------------------------------------------- 1 | pub(crate) fn crc32(data: impl AsRef<[u8]>) -> u32 { 2 | crc32c::crc32c(data.as_ref()) 3 | } 4 | 5 | pub(crate) struct Digest { 6 | hasher: crc32c::Crc32cHasher, 7 | } 8 | 9 | impl Digest { 10 | pub fn update(&mut self, data: &[u8]) { 11 | use std::hash::Hasher; 12 | self.hasher.write(data); 13 | } 14 | 15 | pub fn finalize(self) -> u32 { 16 | use std::hash::Hasher; 17 | self.hasher.finish() as u32 18 | } 19 | } 20 | 21 | pub(crate) fn digest() -> Digest { 22 | Digest { 23 | hasher: crc32c::Crc32cHasher::new(0), 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/disk_env.rs: -------------------------------------------------------------------------------- 1 | use crate::env::{path_to_str, Env, FileLock, Logger, RandomAccess}; 2 | use crate::env_common::{micros, sleep_for}; 3 | use crate::error::{err, Result, Status, StatusCode}; 4 | use crate::types::{share, Shared}; 5 | use fs2::FileExt; 6 | 7 | use std::collections::HashMap; 8 | use std::fs::{self, File}; 9 | use std::io::{self, ErrorKind, Read, Write}; 10 | use std::iter::FromIterator; 11 | use std::path::{Path, PathBuf}; 12 | 13 | type FileDescriptor = i32; 14 | 15 | #[derive(Clone)] 16 | pub struct PosixDiskEnv { 17 | locks: Shared>, 18 | } 19 | 20 | impl Default for PosixDiskEnv { 21 | fn default() -> Self { 22 | Self::new() 23 | } 24 | } 25 | 26 | impl PosixDiskEnv { 27 | pub fn new() -> PosixDiskEnv { 28 | PosixDiskEnv { 29 | locks: share(HashMap::new()), 30 | } 31 | } 32 | } 33 | 34 | /// map_err_with_name annotates an io::Error with information about the operation and the file. 35 | fn map_err_with_name(method: &'static str, f: &Path, e: io::Error) -> Status { 36 | let mut s = Status::from(e); 37 | s.err = format!("{}: {}: {}", method, s.err, path_to_str(f)); 38 | s 39 | } 40 | 41 | // Note: We're using Ok(f()?) in several locations below in order to benefit from the automatic 42 | // error conversion using std::convert::From. 43 | impl Env for PosixDiskEnv { 44 | fn open_sequential_file(&self, p: &Path) -> Result> { 45 | Ok(Box::new( 46 | fs::OpenOptions::new() 47 | .read(true) 48 | .open(p) 49 | .map_err(|e| map_err_with_name("open (seq)", p, e))?, 50 | )) 51 | } 52 | fn open_random_access_file(&self, p: &Path) -> Result> { 53 | fs::OpenOptions::new() 54 | .read(true) 55 | .open(p) 56 | .map(|f| { 57 | let b: Box = Box::new(f); 58 | b 59 | }) 60 | .map_err(|e| map_err_with_name("open (randomaccess)", p, e)) 61 | } 62 | fn open_writable_file(&self, p: &Path) -> Result> { 63 | Ok(Box::new( 64 | fs::OpenOptions::new() 65 | .create(true) 66 | .truncate(true) 67 | .write(true) 68 | .append(false) 69 | .open(p) 70 | .map_err(|e| map_err_with_name("open (write)", p, e))?, 71 | )) 72 | } 73 | fn open_appendable_file(&self, p: &Path) -> Result> { 74 | Ok(Box::new( 75 | fs::OpenOptions::new() 76 | .create(true) 77 | .append(true) 78 | .open(p) 79 | .map_err(|e| map_err_with_name("open (append)", p, e))?, 80 | )) 81 | } 82 | 83 | fn exists(&self, p: &Path) -> Result { 84 | Ok(p.exists()) 85 | } 86 | fn children(&self, p: &Path) -> Result> { 87 | let dir_reader = fs::read_dir(p).map_err(|e| map_err_with_name("children", p, e))?; 88 | let filenames = dir_reader 89 | .map(|r| match r { 90 | Ok(_) => { 91 | let direntry = r.unwrap(); 92 | Path::new(&direntry.file_name()).to_owned() 93 | } 94 | Err(_) => Path::new("").to_owned(), 95 | }) 96 | .filter(|s| !s.as_os_str().is_empty()); 97 | Ok(Vec::from_iter(filenames)) 98 | } 99 | fn size_of(&self, p: &Path) -> Result { 100 | let meta = fs::metadata(p).map_err(|e| map_err_with_name("size_of", p, e))?; 101 | Ok(meta.len() as usize) 102 | } 103 | 104 | fn delete(&self, p: &Path) -> Result<()> { 105 | fs::remove_file(p).map_err(|e| map_err_with_name("delete", p, e)) 106 | } 107 | fn mkdir(&self, p: &Path) -> Result<()> { 108 | fs::create_dir_all(p).map_err(|e| map_err_with_name("mkdir", p, e)) 109 | } 110 | fn rmdir(&self, p: &Path) -> Result<()> { 111 | fs::remove_dir_all(p).map_err(|e| map_err_with_name("rmdir", p, e)) 112 | } 113 | fn rename(&self, old: &Path, new: &Path) -> Result<()> { 114 | fs::rename(old, new).map_err(|e| map_err_with_name("rename", old, e)) 115 | } 116 | 117 | fn lock(&self, p: &Path) -> Result { 118 | let mut locks = self.locks.borrow_mut(); 119 | 120 | if let std::collections::hash_map::Entry::Vacant(e) = 121 | locks.entry(p.to_str().unwrap().to_string()) 122 | { 123 | let f = fs::OpenOptions::new() 124 | .write(true) 125 | .create(true) 126 | .truncate(true) 127 | .open(p) 128 | .map_err(|e| map_err_with_name("lock", p, e))?; 129 | 130 | match f.try_lock_exclusive() { 131 | Err(err) if err.kind() == ErrorKind::WouldBlock => { 132 | return Err(Status::new( 133 | StatusCode::LockError, 134 | "lock on database is already held by different process", 135 | )) 136 | } 137 | Err(_) => { 138 | return Err(Status::new( 139 | StatusCode::Errno(errno::errno()), 140 | &format!("unknown lock error on file {:?} (file {})", f, p.display()), 141 | )) 142 | } 143 | _ => (), 144 | }; 145 | 146 | e.insert(f); 147 | let lock = FileLock { 148 | id: p.to_str().unwrap().to_string(), 149 | }; 150 | Ok(lock) 151 | } else { 152 | Err(Status::new(StatusCode::AlreadyExists, "Lock is held")) 153 | } 154 | } 155 | fn unlock(&self, l: FileLock) -> Result<()> { 156 | let mut locks = self.locks.borrow_mut(); 157 | if !locks.contains_key(&l.id) { 158 | err( 159 | StatusCode::LockError, 160 | &format!("unlocking a file that is not locked: {}", l.id), 161 | ) 162 | } else { 163 | let f = locks.remove(&l.id).unwrap(); 164 | if f.unlock().is_err() { 165 | return err(StatusCode::LockError, &format!("unlock failed: {}", l.id)); 166 | } 167 | Ok(()) 168 | } 169 | } 170 | 171 | fn new_logger(&self, p: &Path) -> Result { 172 | self.open_appendable_file(p) 173 | .map(|dst| Logger::new(Box::new(dst))) 174 | } 175 | 176 | fn micros(&self) -> u64 { 177 | micros() 178 | } 179 | 180 | fn sleep_for(&self, micros: u32) { 181 | sleep_for(micros); 182 | } 183 | } 184 | 185 | #[cfg(test)] 186 | mod tests { 187 | use super::*; 188 | 189 | use std::convert::AsRef; 190 | use std::io::Write; 191 | use std::iter::FromIterator; 192 | 193 | #[test] 194 | fn test_files() { 195 | let n = "testfile.xyz".to_string(); 196 | let name = n.as_ref(); 197 | let env = PosixDiskEnv::new(); 198 | 199 | // exists, size_of, delete 200 | assert!(env.open_appendable_file(name).is_ok()); 201 | assert!(env.exists(name).unwrap_or(false)); 202 | assert_eq!(env.size_of(name).unwrap_or(1), 0); 203 | assert!(env.delete(name).is_ok()); 204 | 205 | assert!(env.open_writable_file(name).is_ok()); 206 | assert!(env.exists(name).unwrap_or(false)); 207 | assert_eq!(env.size_of(name).unwrap_or(1), 0); 208 | assert!(env.delete(name).is_ok()); 209 | 210 | { 211 | // write 212 | let mut f = env.open_writable_file(name).unwrap(); 213 | let _ = f.write("123xyz".as_bytes()); 214 | assert_eq!(6, env.size_of(name).unwrap_or(0)); 215 | 216 | // rename 217 | let newname = Path::new("testfile2.xyz"); 218 | assert!(env.rename(name, newname).is_ok()); 219 | assert_eq!(6, env.size_of(newname).unwrap()); 220 | assert!(!env.exists(name).unwrap()); 221 | // rename back so that the remaining tests can use the file. 222 | assert!(env.rename(newname, name).is_ok()); 223 | } 224 | 225 | assert!(env.open_sequential_file(name).is_ok()); 226 | assert!(env.open_random_access_file(name).is_ok()); 227 | 228 | assert!(env.delete(name).is_ok()); 229 | } 230 | 231 | #[test] 232 | fn test_locking() { 233 | let env = PosixDiskEnv::new(); 234 | let n = "testfile.123".to_string(); 235 | let name = n.as_ref(); 236 | 237 | { 238 | let mut f = env.open_writable_file(name).unwrap(); 239 | let _ = f.write("123xyz".as_bytes()); 240 | assert_eq!(env.size_of(name).unwrap_or(0), 6); 241 | } 242 | 243 | { 244 | let r = env.lock(name); 245 | assert!(r.is_ok()); 246 | env.unlock(r.unwrap()).unwrap(); 247 | } 248 | 249 | { 250 | let r = env.lock(name); 251 | assert!(r.is_ok()); 252 | let s = env.lock(name); 253 | assert!(s.is_err()); 254 | env.unlock(r.unwrap()).unwrap(); 255 | } 256 | 257 | assert!(env.delete(name).is_ok()); 258 | } 259 | 260 | #[test] 261 | fn test_dirs() { 262 | let d = "subdir/"; 263 | let dirname = d.as_ref(); 264 | let env = PosixDiskEnv::new(); 265 | 266 | assert!(env.mkdir(dirname).is_ok()); 267 | assert!(env 268 | .open_writable_file( 269 | String::from_iter(vec![d.to_string(), "f1.txt".to_string()].into_iter()).as_ref() 270 | ) 271 | .is_ok()); 272 | assert_eq!(env.children(dirname).unwrap().len(), 1); 273 | assert!(env.rmdir(dirname).is_ok()); 274 | } 275 | } 276 | -------------------------------------------------------------------------------- /src/env.rs: -------------------------------------------------------------------------------- 1 | //! An `env` is an abstraction layer that allows the database to run both on different platforms as 2 | //! well as persisting data on disk or in memory. 3 | 4 | use crate::error::Result; 5 | 6 | use std::fs::File; 7 | use std::io::prelude::*; 8 | #[cfg(unix)] 9 | use std::os::unix::fs::FileExt; 10 | #[cfg(windows)] 11 | use std::os::windows::fs::FileExt; 12 | use std::path::{Path, PathBuf}; 13 | 14 | pub trait RandomAccess { 15 | fn read_at(&self, off: usize, dst: &mut [u8]) -> Result; 16 | } 17 | 18 | #[cfg(unix)] 19 | impl RandomAccess for File { 20 | fn read_at(&self, off: usize, dst: &mut [u8]) -> Result { 21 | Ok((self as &dyn FileExt).read_at(dst, off as u64)?) 22 | } 23 | } 24 | 25 | #[cfg(windows)] 26 | impl RandomAccess for File { 27 | fn read_at(&self, off: usize, dst: &mut [u8]) -> Result { 28 | Ok((self as &dyn FileExt).seek_read(dst, off as u64)?) 29 | } 30 | } 31 | 32 | pub struct FileLock { 33 | pub id: String, 34 | } 35 | 36 | pub trait Env { 37 | fn open_sequential_file(&self, _: &Path) -> Result>; 38 | fn open_random_access_file(&self, _: &Path) -> Result>; 39 | fn open_writable_file(&self, _: &Path) -> Result>; 40 | fn open_appendable_file(&self, _: &Path) -> Result>; 41 | 42 | fn exists(&self, _: &Path) -> Result; 43 | fn children(&self, _: &Path) -> Result>; 44 | fn size_of(&self, _: &Path) -> Result; 45 | 46 | fn delete(&self, _: &Path) -> Result<()>; 47 | fn mkdir(&self, _: &Path) -> Result<()>; 48 | fn rmdir(&self, _: &Path) -> Result<()>; 49 | fn rename(&self, _: &Path, _: &Path) -> Result<()>; 50 | 51 | fn lock(&self, _: &Path) -> Result; 52 | fn unlock(&self, l: FileLock) -> Result<()>; 53 | 54 | fn new_logger(&self, _: &Path) -> Result; 55 | 56 | fn micros(&self) -> u64; 57 | fn sleep_for(&self, micros: u32); 58 | } 59 | 60 | pub struct Logger { 61 | dst: Box, 62 | } 63 | 64 | impl Logger { 65 | pub fn new(w: Box) -> Logger { 66 | Logger { dst: w } 67 | } 68 | 69 | pub fn log(&mut self, message: &str) { 70 | let _ = self.dst.write(message.as_bytes()); 71 | let _ = self.dst.write("\n".as_bytes()); 72 | } 73 | } 74 | 75 | pub fn path_to_string(p: &Path) -> String { 76 | p.to_str().map(String::from).unwrap() 77 | } 78 | 79 | pub fn path_to_str(p: &Path) -> &str { 80 | p.to_str().unwrap() 81 | } 82 | -------------------------------------------------------------------------------- /src/env_common.rs: -------------------------------------------------------------------------------- 1 | use std::thread; 2 | use std::time; 3 | 4 | pub fn micros() -> u64 { 5 | loop { 6 | let now = time::SystemTime::now().duration_since(time::UNIX_EPOCH); 7 | 8 | match now { 9 | Err(_) => continue, 10 | Ok(dur) => return dur.as_secs() * 1000000 + dur.subsec_micros() as u64, 11 | } 12 | } 13 | } 14 | 15 | pub fn sleep_for(micros: u32) { 16 | thread::sleep(time::Duration::new(0, micros * 1000)); 17 | } 18 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::convert::From; 2 | use std::error::Error; 3 | use std::fmt::{self, Display, Formatter}; 4 | use std::io; 5 | use std::result; 6 | use std::sync; 7 | 8 | /// StatusCode describes various failure modes of database operations. 9 | #[derive(Clone, Debug, PartialEq)] 10 | #[allow(dead_code)] 11 | pub enum StatusCode { 12 | OK, 13 | 14 | AlreadyExists, 15 | Corruption, 16 | CompressionError, 17 | IOError, 18 | InvalidArgument, 19 | InvalidData, 20 | LockError, 21 | NotFound, 22 | NotSupported, 23 | PermissionDenied, 24 | AsyncError, 25 | Unknown, 26 | #[cfg(feature = "fs")] 27 | Errno(errno::Errno), 28 | } 29 | 30 | /// Status encapsulates a `StatusCode` and an error message. It can be displayed, and also 31 | /// implements `Error`. 32 | #[derive(Clone, Debug, PartialEq)] 33 | pub struct Status { 34 | pub code: StatusCode, 35 | pub err: String, 36 | } 37 | 38 | impl Default for Status { 39 | fn default() -> Status { 40 | Status { 41 | code: StatusCode::OK, 42 | err: String::new(), 43 | } 44 | } 45 | } 46 | 47 | impl Display for Status { 48 | fn fmt(&self, fmt: &mut Formatter) -> result::Result<(), fmt::Error> { 49 | fmt.write_str(&self.err) 50 | } 51 | } 52 | 53 | impl Error for Status { 54 | fn description(&self) -> &str { 55 | &self.err 56 | } 57 | } 58 | 59 | impl Status { 60 | pub fn new(code: StatusCode, msg: &str) -> Status { 61 | let err = match msg.is_empty() { 62 | true => format!("{:?}", code), 63 | false => format!("{:?}: {}", code, msg), 64 | }; 65 | Status { code, err } 66 | } 67 | pub fn annotate>(self, msg: S) -> Status { 68 | Status { 69 | code: self.code, 70 | err: format!("{}: {}", msg.as_ref(), self.err), 71 | } 72 | } 73 | } 74 | 75 | /// LevelDB's result type 76 | pub type Result = result::Result; 77 | 78 | /// err returns a new Status wrapped in a Result. 79 | pub fn err(code: StatusCode, msg: &str) -> Result { 80 | Err(Status::new(code, msg)) 81 | } 82 | 83 | impl From for Status { 84 | fn from(e: io::Error) -> Status { 85 | let c = match e.kind() { 86 | io::ErrorKind::NotFound => StatusCode::NotFound, 87 | io::ErrorKind::InvalidData => StatusCode::Corruption, 88 | io::ErrorKind::InvalidInput => StatusCode::InvalidArgument, 89 | io::ErrorKind::PermissionDenied => StatusCode::PermissionDenied, 90 | _ => StatusCode::IOError, 91 | }; 92 | 93 | Status::new(c, &e.to_string()) 94 | } 95 | } 96 | 97 | impl From> for Status { 98 | fn from(_: sync::PoisonError) -> Status { 99 | Status::new(StatusCode::LockError, "lock poisoned") 100 | } 101 | } 102 | 103 | impl From for Status { 104 | fn from(e: snap::Error) -> Status { 105 | Status { 106 | code: StatusCode::CompressionError, 107 | err: e.to_string(), 108 | } 109 | } 110 | } 111 | 112 | #[cfg(test)] 113 | mod tests { 114 | use super::{Status, StatusCode}; 115 | #[test] 116 | fn test_status_to_string() { 117 | let s = Status::new(StatusCode::InvalidData, "Invalid data!"); 118 | assert_eq!("InvalidData: Invalid data!", s.to_string()); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/filter.rs: -------------------------------------------------------------------------------- 1 | use std::rc::Rc; 2 | 3 | use integer_encoding::FixedInt; 4 | 5 | /// Encapsulates a filter algorithm allowing to search for keys more efficiently. 6 | /// Usually, policies are used as a BoxedFilterPolicy (see below), so they 7 | /// can be easily cloned and nested. 8 | pub trait FilterPolicy { 9 | /// Returns a string identifying this policy. 10 | fn name(&self) -> &'static str; 11 | /// Create a filter matching the given keys. Keys are given as a long byte array that is 12 | /// indexed by the offsets contained in key_offsets. 13 | fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec; 14 | /// Check whether the given key may match the filter. 15 | fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool; 16 | } 17 | 18 | /// A boxed and refcounted filter policy (reference-counted because a Box with unsized content 19 | /// couldn't be cloned otherwise) 20 | pub type BoxedFilterPolicy = Rc>; 21 | 22 | impl FilterPolicy for BoxedFilterPolicy { 23 | fn name(&self) -> &'static str { 24 | (**self).name() 25 | } 26 | fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec { 27 | (**self).create_filter(keys, key_offsets) 28 | } 29 | fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { 30 | (**self).key_may_match(key, filter) 31 | } 32 | } 33 | 34 | /// Used for tables that don't have filter blocks but need a type parameter. 35 | #[derive(Clone)] 36 | pub struct NoFilterPolicy; 37 | 38 | impl NoFilterPolicy { 39 | pub fn new() -> NoFilterPolicy { 40 | NoFilterPolicy 41 | } 42 | } 43 | 44 | impl FilterPolicy for NoFilterPolicy { 45 | fn name(&self) -> &'static str { 46 | "_" 47 | } 48 | fn create_filter(&self, _: &[u8], _: &[usize]) -> Vec { 49 | vec![] 50 | } 51 | fn key_may_match(&self, _: &[u8], _: &[u8]) -> bool { 52 | true 53 | } 54 | } 55 | 56 | const BLOOM_SEED: u32 = 0xbc9f1d34; 57 | 58 | /// A filter policy using a bloom filter internally. 59 | #[derive(Clone)] 60 | pub struct BloomPolicy { 61 | bits_per_key: u32, 62 | k: u32, 63 | } 64 | 65 | /// Beware the magic numbers... 66 | impl BloomPolicy { 67 | /// Returns a new boxed BloomPolicy. 68 | pub fn new(bits_per_key: u32) -> BloomPolicy { 69 | BloomPolicy::new_unwrapped(bits_per_key) 70 | } 71 | 72 | /// Returns a new BloomPolicy with the given parameter. 73 | fn new_unwrapped(bits_per_key: u32) -> BloomPolicy { 74 | let mut k = (bits_per_key as f32 * 0.69) as u32; 75 | 76 | if k < 1 { 77 | k = 1; 78 | } else if k > 30 { 79 | k = 30; 80 | } 81 | 82 | BloomPolicy { bits_per_key, k } 83 | } 84 | 85 | fn bloom_hash(&self, data: &[u8]) -> u32 { 86 | let m: u32 = 0xc6a4a793; 87 | let r: u32 = 24; 88 | 89 | let mut ix = 0; 90 | let limit = data.len(); 91 | 92 | let mut h: u32 = BLOOM_SEED ^ (limit as u64 * m as u64) as u32; 93 | 94 | while ix + 4 <= limit { 95 | let w = u32::decode_fixed(&data[ix..ix + 4]); 96 | ix += 4; 97 | 98 | h = (h as u64 + w as u64) as u32; 99 | h = (h as u64 * m as u64) as u32; 100 | h ^= h >> 16; 101 | } 102 | 103 | // Process left-over bytes 104 | assert!(limit - ix < 4); 105 | 106 | if limit - ix > 0 { 107 | for (i, b) in data[ix..].iter().enumerate() { 108 | h = h.overflowing_add((*b as u32) << (8 * i)).0; 109 | } 110 | 111 | h = (h as u64 * m as u64) as u32; 112 | h ^= h >> r; 113 | } 114 | h 115 | } 116 | } 117 | 118 | impl FilterPolicy for BloomPolicy { 119 | fn name(&self) -> &'static str { 120 | "leveldb.BuiltinBloomFilter2" 121 | } 122 | fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec { 123 | let filter_bits = key_offsets.len() * self.bits_per_key as usize; 124 | let mut filter: Vec; 125 | 126 | if filter_bits < 64 { 127 | filter = Vec::with_capacity(8 + 1); 128 | filter.resize(8, 0); 129 | } else { 130 | filter = Vec::with_capacity(1 + ((filter_bits + 7) / 8)); 131 | filter.resize((filter_bits + 7) / 8, 0); 132 | } 133 | 134 | let adj_filter_bits = (filter.len() * 8) as u32; 135 | 136 | // Encode k at the end of the filter. 137 | filter.push(self.k as u8); 138 | 139 | // Add all keys to the filter. 140 | offset_data_iterate(keys, key_offsets, |key| { 141 | let mut h = self.bloom_hash(key); 142 | let delta = (h >> 17) | (h << 15); 143 | for _ in 0..self.k { 144 | let bitpos = (h % adj_filter_bits) as usize; 145 | filter[bitpos / 8] |= 1 << (bitpos % 8); 146 | h = (h as u64 + delta as u64) as u32; 147 | } 148 | }); 149 | 150 | filter 151 | } 152 | fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { 153 | if filter.is_empty() { 154 | return true; 155 | } 156 | 157 | let bits = (filter.len() - 1) as u32 * 8; 158 | let k = filter[filter.len() - 1]; 159 | let filter_adj = &filter[0..filter.len() - 1]; 160 | 161 | if k > 30 { 162 | return true; 163 | } 164 | 165 | let mut h = self.bloom_hash(key); 166 | let delta = (h >> 17) | (h << 15); 167 | for _ in 0..k { 168 | let bitpos = (h % bits) as usize; 169 | if (filter_adj[bitpos / 8] & (1 << (bitpos % 8))) == 0 { 170 | return false; 171 | } 172 | h = (h as u64 + delta as u64) as u32; 173 | } 174 | true 175 | } 176 | } 177 | 178 | /// A filter policy wrapping another policy; extracting the user key from internal keys for all 179 | /// operations. 180 | /// A User Key is u8*. 181 | /// An Internal Key is u8* u8{8} (where the second part encodes a tag and a sequence number). 182 | #[derive(Clone)] 183 | pub struct InternalFilterPolicy { 184 | internal: FP, 185 | } 186 | 187 | impl InternalFilterPolicy { 188 | pub fn new(inner: FP) -> InternalFilterPolicy { 189 | InternalFilterPolicy { internal: inner } 190 | } 191 | } 192 | 193 | impl FilterPolicy for InternalFilterPolicy { 194 | fn name(&self) -> &'static str { 195 | self.internal.name() 196 | } 197 | 198 | fn create_filter(&self, keys: &[u8], key_offsets: &[usize]) -> Vec { 199 | let mut mod_keys = Vec::with_capacity(keys.len() - key_offsets.len() * 8); 200 | let mut mod_key_offsets = Vec::with_capacity(key_offsets.len()); 201 | 202 | offset_data_iterate(keys, key_offsets, |key| { 203 | mod_key_offsets.push(mod_keys.len()); 204 | mod_keys.extend_from_slice(&key[0..key.len() - 8]); 205 | }); 206 | self.internal.create_filter(&mod_keys, &mod_key_offsets) 207 | } 208 | 209 | fn key_may_match(&self, key: &[u8], filter: &[u8]) -> bool { 210 | self.internal.key_may_match(&key[0..key.len() - 8], filter) 211 | } 212 | } 213 | 214 | /// offset_data_iterate iterates over the entries in data that are indexed by the offsets given in 215 | /// offsets. This is e.g. the internal format of a FilterBlock. 216 | fn offset_data_iterate(data: &[u8], offsets: &[usize], mut f: F) { 217 | for offix in 0..offsets.len() { 218 | let upper = if offix == offsets.len() - 1 { 219 | data.len() 220 | } else { 221 | offsets[offix + 1] 222 | }; 223 | let piece = &data[offsets[offix]..upper]; 224 | f(piece); 225 | } 226 | } 227 | 228 | #[cfg(test)] 229 | mod tests { 230 | use super::*; 231 | use crate::key_types::LookupKey; 232 | 233 | const _BITS_PER_KEY: u32 = 12; 234 | 235 | fn input_data() -> (Vec, Vec) { 236 | let mut concat = vec![]; 237 | let mut offs = vec![]; 238 | 239 | for d in [ 240 | "abc123def456".as_bytes(), 241 | "xxx111xxx222".as_bytes(), 242 | "ab00cd00ab".as_bytes(), 243 | "908070605040302010".as_bytes(), 244 | ] 245 | .iter() 246 | { 247 | offs.push(concat.len()); 248 | concat.extend_from_slice(d); 249 | } 250 | (concat, offs) 251 | } 252 | 253 | /// Creates a filter using the keys from input_data(). 254 | fn create_filter() -> Vec { 255 | let fpol = BloomPolicy::new(_BITS_PER_KEY); 256 | let (data, offs) = input_data(); 257 | let filter = fpol.create_filter(&data, &offs); 258 | 259 | assert_eq!(filter, vec![194, 148, 129, 140, 192, 196, 132, 164, 8]); 260 | filter 261 | } 262 | 263 | /// Creates a filter using the keys from input_data() but converted to InternalKey format. 264 | fn create_internalkey_filter() -> Vec { 265 | let fpol = Rc::new(Box::new(InternalFilterPolicy::new(BloomPolicy::new( 266 | _BITS_PER_KEY, 267 | )))); 268 | let (data, offs) = input_data(); 269 | let (mut intdata, mut intoffs) = (vec![], vec![]); 270 | 271 | offset_data_iterate(&data, &offs, |key| { 272 | let ikey = LookupKey::new(key, 123); 273 | intoffs.push(intdata.len()); 274 | intdata.extend_from_slice(ikey.internal_key()); 275 | }); 276 | 277 | fpol.create_filter(&intdata, &intoffs) 278 | } 279 | 280 | #[test] 281 | fn test_filter_bloom() { 282 | let f = create_filter(); 283 | let fp = BloomPolicy::new(_BITS_PER_KEY); 284 | let (data, offs) = input_data(); 285 | 286 | offset_data_iterate(&data, &offs, |key| { 287 | assert!(fp.key_may_match(key, &f)); 288 | }); 289 | } 290 | 291 | /// This test verifies that InternalFilterPolicy works correctly. 292 | #[test] 293 | fn test_filter_internal_keys_identical() { 294 | assert_eq!(create_filter(), create_internalkey_filter()); 295 | } 296 | 297 | #[test] 298 | fn test_filter_bloom_hash() { 299 | let d1 = vec![0x62]; 300 | let d2 = vec![0xc3, 0x97]; 301 | let d3 = vec![0xe2, 0x99, 0xa5]; 302 | let d4 = vec![0xe1, 0x80, 0xb9, 0x32]; 303 | 304 | let fp = BloomPolicy::new_unwrapped(_BITS_PER_KEY); 305 | 306 | assert_eq!(fp.bloom_hash(&d1), 0xef1345c4); 307 | assert_eq!(fp.bloom_hash(&d2), 0x5b663814); 308 | assert_eq!(fp.bloom_hash(&d3), 0x323c078f); 309 | assert_eq!(fp.bloom_hash(&d4), 0xed21633a); 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /src/filter_block.rs: -------------------------------------------------------------------------------- 1 | use crate::block::BlockContents; 2 | use crate::filter::BoxedFilterPolicy; 3 | 4 | use std::rc::Rc; 5 | 6 | use integer_encoding::FixedInt; 7 | 8 | const FILTER_BASE_LOG2: u32 = 11; 9 | const FILTER_BASE: u32 = 1 << FILTER_BASE_LOG2; // 2 KiB 10 | 11 | /// For a given byte offset, returns the index of the filter that includes the key at that offset. 12 | #[inline] 13 | fn get_filter_index(offset: usize, base_lg2: u32) -> u32 { 14 | // divide by 2048 15 | (offset >> base_lg2 as usize) as u32 16 | } 17 | 18 | /// A Filter Block is built like this: 19 | /// 20 | /// [filter0, filter1, filter2, ..., offset of filter0, offset of filter1, ..., offset of offsets 21 | /// array, log2 of FILTER_BASE] 22 | /// 23 | /// where offsets are 4 bytes, offset of offsets is 4 bytes, and log2 of FILTER_BASE is 1 byte. 24 | /// Two consecutive filter offsets may be the same. 25 | pub struct FilterBlockBuilder { 26 | policy: BoxedFilterPolicy, 27 | // filters, concatenated 28 | filters: Vec, 29 | filter_offsets: Vec, 30 | 31 | // Reset on every start_block() 32 | key_offsets: Vec, 33 | keys: Vec, 34 | } 35 | 36 | impl FilterBlockBuilder { 37 | pub fn new(fp: BoxedFilterPolicy) -> FilterBlockBuilder { 38 | FilterBlockBuilder { 39 | policy: fp, 40 | // some pre-allocation 41 | filters: Vec::with_capacity(1024), 42 | filter_offsets: Vec::with_capacity(1024), 43 | key_offsets: Vec::with_capacity(1024), 44 | keys: Vec::with_capacity(1024), 45 | } 46 | } 47 | 48 | pub fn size_estimate(&self) -> usize { 49 | self.filters.len() + 4 * self.filter_offsets.len() + 4 + 1 50 | } 51 | 52 | pub fn filter_name(&self) -> &'static str { 53 | self.policy.name() 54 | } 55 | 56 | pub fn add_key(&mut self, key: &[u8]) { 57 | self.key_offsets.push(self.keys.len()); 58 | self.keys.extend_from_slice(key); 59 | } 60 | 61 | pub fn start_block(&mut self, offset: usize) { 62 | let filter_ix = get_filter_index(offset, FILTER_BASE_LOG2); 63 | assert!(filter_ix >= self.filter_offsets.len() as u32); 64 | 65 | while filter_ix > self.filter_offsets.len() as u32 { 66 | self.generate_filter(); 67 | } 68 | } 69 | 70 | fn generate_filter(&mut self) { 71 | self.filter_offsets.push(self.filters.len()); 72 | if self.keys.is_empty() { 73 | return; 74 | } 75 | 76 | let filter = self.policy.create_filter(&self.keys, &self.key_offsets); 77 | self.filters.extend_from_slice(&filter); 78 | 79 | self.keys.clear(); 80 | self.key_offsets.clear(); 81 | } 82 | 83 | pub fn finish(mut self) -> Vec { 84 | if !self.keys.is_empty() { 85 | self.generate_filter(); 86 | } 87 | 88 | let mut result = self.filters; 89 | let offsets_offset = result.len(); 90 | let mut ix = result.len(); 91 | result.resize(ix + 4 * self.filter_offsets.len() + 5, 0); 92 | 93 | // Put filter offsets at the end 94 | for offset in self.filter_offsets.into_iter() { 95 | (offset as u32).encode_fixed(&mut result[ix..ix + 4]); 96 | ix += 4; 97 | } 98 | 99 | (offsets_offset as u32).encode_fixed(&mut result[ix..ix + 4]); 100 | ix += 4; 101 | result[ix] = FILTER_BASE_LOG2 as u8; 102 | 103 | result 104 | } 105 | } 106 | 107 | #[derive(Clone)] 108 | pub struct FilterBlockReader { 109 | policy: BoxedFilterPolicy, 110 | block: Rc, 111 | 112 | offsets_offset: usize, 113 | filter_base_lg2: u32, 114 | } 115 | 116 | impl FilterBlockReader { 117 | pub fn new_owned(pol: BoxedFilterPolicy, data: Vec) -> FilterBlockReader { 118 | FilterBlockReader::new(pol, Rc::new(data)) 119 | } 120 | 121 | pub fn new(pol: BoxedFilterPolicy, data: Rc>) -> FilterBlockReader { 122 | assert!(data.len() >= 5); 123 | 124 | let fbase = data[data.len() - 1] as u32; 125 | let offset = u32::decode_fixed(&data[data.len() - 5..data.len() - 1]) as usize; 126 | 127 | FilterBlockReader { 128 | policy: pol, 129 | block: data, 130 | filter_base_lg2: fbase, 131 | offsets_offset: offset, 132 | } 133 | } 134 | 135 | /// Returns number of filters 136 | pub fn num(&self) -> u32 { 137 | ((self.block.len() - self.offsets_offset - 5) / 4) as u32 138 | } 139 | 140 | /// Returns the offset of the offset with index i. 141 | fn offset_of(&self, i: u32) -> usize { 142 | let offset_offset = self.offsets_offset + 4 * i as usize; 143 | u32::decode_fixed(&self.block[offset_offset..offset_offset + 4]) as usize 144 | } 145 | 146 | /// blk_offset is the offset of the block containing key. Returns whether the key matches the 147 | /// filter for the block at blk_offset. 148 | pub fn key_may_match(&self, blk_offset: usize, key: &[u8]) -> bool { 149 | if get_filter_index(blk_offset, self.filter_base_lg2) > self.num() { 150 | return true; 151 | } 152 | 153 | let filter_begin = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2)); 154 | let filter_end = self.offset_of(get_filter_index(blk_offset, self.filter_base_lg2) + 1); 155 | 156 | assert!(filter_begin < filter_end); 157 | assert!(filter_end <= self.offsets_offset); 158 | 159 | self.policy 160 | .key_may_match(key, &self.block[filter_begin..filter_end]) 161 | } 162 | } 163 | 164 | #[cfg(test)] 165 | mod tests { 166 | use super::get_filter_index; 167 | use super::FILTER_BASE_LOG2; 168 | use super::*; 169 | use crate::filter::BloomPolicy; 170 | 171 | #[test] 172 | fn test_filter_index() { 173 | assert_eq!(get_filter_index(3777, FILTER_BASE_LOG2), 1); 174 | assert_eq!(get_filter_index(10000, FILTER_BASE_LOG2), 4); 175 | } 176 | 177 | fn get_keys() -> Vec<&'static [u8]> { 178 | vec![b"abcd", b"efgh", b"ijkl", b"mnopqrstuvwxyz"] 179 | } 180 | 181 | fn produce_filter_block() -> Vec { 182 | let keys = get_keys(); 183 | let mut bld = FilterBlockBuilder::new(Rc::new(Box::new(BloomPolicy::new(32)))); 184 | 185 | bld.start_block(0); 186 | 187 | for k in keys.iter() { 188 | bld.add_key(k); 189 | } 190 | 191 | // second block 192 | bld.start_block(5000); 193 | 194 | for k in keys.iter() { 195 | bld.add_key(k); 196 | } 197 | 198 | bld.finish() 199 | } 200 | 201 | #[test] 202 | fn test_filter_block_builder() { 203 | let result = produce_filter_block(); 204 | // 2 blocks of 4 filters of 4 bytes plus 1B for `k`; plus three filter offsets (because of 205 | // the block offsets of 0 and 5000); plus footer 206 | assert_eq!(result.len(), 2 * (get_keys().len() * 4 + 1) + (3 * 4) + 5); 207 | assert_eq!( 208 | result, 209 | vec![ 210 | 234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220, 112, 234, 227, 22, 211 | 234, 195, 25, 155, 61, 141, 173, 140, 221, 28, 222, 92, 220, 112, 234, 227, 22, 0, 212 | 0, 0, 0, 17, 0, 0, 0, 17, 0, 0, 0, 34, 0, 0, 0, 11, 213 | ] 214 | ); 215 | } 216 | 217 | #[test] 218 | fn test_filter_block_build_read() { 219 | let result = produce_filter_block(); 220 | let reader = FilterBlockReader::new_owned(Rc::new(Box::new(BloomPolicy::new(32))), result); 221 | 222 | assert_eq!( 223 | reader.offset_of(get_filter_index(5121, FILTER_BASE_LOG2)), 224 | 17 225 | ); // third block in third filter 226 | 227 | let unknown_keys: [&[u8]; 3] = [b"xsb", b"9sad", b"assssaaaass"]; 228 | 229 | for block_offset in vec![0, 1024, 5000, 6025].into_iter() { 230 | for key in get_keys().iter() { 231 | assert!( 232 | reader.key_may_match(block_offset, key), 233 | "{} {:?} ", 234 | block_offset, 235 | key 236 | ); 237 | } 238 | for key in unknown_keys.iter() { 239 | assert!(!reader.key_may_match(block_offset, key)); 240 | } 241 | } 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/infolog.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Write}; 2 | 3 | pub struct Logger(pub Box); 4 | 5 | pub fn stderr() -> Logger { 6 | Logger(Box::new(io::stderr())) 7 | } 8 | 9 | #[macro_export] 10 | macro_rules! log { 11 | ($l:expr) => ($l.as_ref().map(|l| l.borrow_mut().0.write("\n".as_bytes()).is_ok())); 12 | ($l:expr, $fmt:expr) => ( 13 | $l.as_ref().map(|l| l.borrow_mut().0.write(concat!($fmt, "\n").as_bytes()).is_ok())); 14 | ($l:expr, $fmt:expr, $($arg:tt)*) => ( 15 | $l.as_ref().map( 16 | |l| l.borrow_mut().0.write_fmt(format_args!(concat!($fmt, "\n"), $($arg)*)).is_ok())); 17 | } 18 | -------------------------------------------------------------------------------- /src/key_types.rs: -------------------------------------------------------------------------------- 1 | use crate::cmp::Cmp; 2 | use crate::types::SequenceNumber; 3 | 4 | use std::cmp::Ordering; 5 | use std::io::Write; 6 | 7 | use integer_encoding::{FixedInt, FixedIntWriter, VarInt, VarIntWriter}; 8 | 9 | // The following typedefs are used to distinguish between the different key formats used internally 10 | // by different modules. 11 | 12 | // TODO: At some point, convert those into actual types with conversions between them. That's a lot 13 | // of boilerplate, but increases type safety. 14 | 15 | #[derive(Debug, Clone, Copy, PartialOrd, PartialEq)] 16 | pub enum ValueType { 17 | TypeDeletion = 0, 18 | TypeValue = 1, 19 | } 20 | 21 | /// A MemtableKey consists of the following elements: [keylen, key, tag, (vallen, value)] where 22 | /// keylen is a varint32 encoding the length of key+tag. tag is a fixed 8 bytes segment encoding 23 | /// the entry type and the sequence number. vallen and value are optional components at the end. 24 | pub type MemtableKey<'a> = &'a [u8]; 25 | 26 | /// A UserKey is the actual key supplied by the calling application, without any internal 27 | /// decorations. 28 | pub type UserKey<'a> = &'a [u8]; 29 | 30 | /// An InternalKey consists of [key, tag], so it's basically a MemtableKey without the initial 31 | /// length specification. This type is used as item type of MemtableIterator, and as the key 32 | /// type of tables. 33 | pub type InternalKey<'a> = &'a [u8]; 34 | 35 | /// A LookupKey is the first part of a memtable key, consisting of [keylen: varint32, key: *u8, 36 | /// tag: u64] 37 | /// keylen is the length of key plus 8 (for the tag; this for LevelDB compatibility) 38 | #[derive(Clone, Debug)] 39 | pub struct LookupKey { 40 | key: Vec, 41 | key_offset: usize, 42 | } 43 | 44 | const U64_SPACE: usize = 8; 45 | 46 | impl LookupKey { 47 | pub fn new(k: UserKey, s: SequenceNumber) -> LookupKey { 48 | LookupKey::new_full(k, s, ValueType::TypeValue) 49 | } 50 | 51 | pub fn new_full(k: UserKey, s: SequenceNumber, t: ValueType) -> LookupKey { 52 | let mut key = Vec::new(); 53 | let internal_keylen = k.len() + U64_SPACE; 54 | key.resize(k.len() + internal_keylen.required_space() + U64_SPACE, 0); 55 | 56 | { 57 | let mut writer = key.as_mut_slice(); 58 | writer 59 | .write_varint(internal_keylen) 60 | .expect("write to slice failed"); 61 | writer.write_all(k).expect("write to slice failed"); 62 | writer 63 | .write_fixedint(s << 8 | t as u64) 64 | .expect("write to slice failed"); 65 | } 66 | 67 | LookupKey { 68 | key, 69 | key_offset: internal_keylen.required_space(), 70 | } 71 | } 72 | 73 | /// Returns the full memtable-formatted key. 74 | pub fn memtable_key(&self) -> MemtableKey { 75 | self.key.as_slice() 76 | } 77 | 78 | /// Returns only the user key portion. 79 | pub fn user_key(&self) -> UserKey { 80 | &self.key[self.key_offset..self.key.len() - 8] 81 | } 82 | 83 | /// Returns key and tag. 84 | pub fn internal_key(&self) -> InternalKey { 85 | &self.key[self.key_offset..] 86 | } 87 | } 88 | 89 | /// Parses a tag into (type, sequence number) 90 | pub fn parse_tag(tag: u64) -> (ValueType, u64) { 91 | let seq = tag >> 8; 92 | let typ = tag & 0xff; 93 | 94 | match typ { 95 | 0 => (ValueType::TypeDeletion, seq), 96 | 1 => (ValueType::TypeValue, seq), 97 | _ => (ValueType::TypeValue, seq), 98 | } 99 | } 100 | 101 | /// A memtable key is a bytestring containing (keylen, key, tag, vallen, val). This function 102 | /// builds such a key. It's called key because the underlying Map implementation will only be 103 | /// concerned with keys; the value field is not used (instead, the value is encoded in the key, 104 | /// and for lookups we just search for the next bigger entry). 105 | /// keylen is the length of key + 8 (to account for the tag) 106 | pub fn build_memtable_key(key: &[u8], value: &[u8], t: ValueType, seq: SequenceNumber) -> Vec { 107 | // We are using the original LevelDB approach here -- encoding key and value into the 108 | // key that is used for insertion into the SkipMap. 109 | // The format is: [key_size: varint32, key_data: [u8], flags: u64, value_size: varint32, 110 | // value_data: [u8]] 111 | 112 | let keysize = key.len() + U64_SPACE; 113 | let valsize = value.len(); 114 | let mut buf = 115 | vec![0_u8; keysize + valsize + keysize.required_space() + valsize.required_space()]; 116 | 117 | { 118 | let mut writer = buf.as_mut_slice(); 119 | writer.write_varint(keysize).expect("write to slice failed"); 120 | writer.write_all(key).expect("write to slice failed"); 121 | writer 122 | .write_fixedint((t as u64) | (seq << 8)) 123 | .expect("write to slice failed"); 124 | writer.write_varint(valsize).expect("write to slice failed"); 125 | writer.write_all(value).expect("write to slice failed"); 126 | assert_eq!(writer.len(), 0); 127 | } 128 | buf 129 | } 130 | 131 | /// Parses a memtable key and returns (keylen, key offset, tag, vallen, val offset). 132 | /// If the key only contains (keylen, key, tag), the vallen and val offset return values will be 133 | /// meaningless. 134 | pub fn parse_memtable_key(mkey: MemtableKey) -> (usize, usize, u64, usize, usize) { 135 | let (keylen, mut i): (usize, usize) = VarInt::decode_var(mkey).unwrap(); 136 | let keyoff = i; 137 | i += keylen - 8; 138 | 139 | if mkey.len() > i { 140 | let tag = FixedInt::decode_fixed(&mkey[i..i + 8]); 141 | i += 8; 142 | let (vallen, j): (usize, usize) = VarInt::decode_var(&mkey[i..]).unwrap(); 143 | i += j; 144 | let valoff = i; 145 | (keylen - 8, keyoff, tag, vallen, valoff) 146 | } else { 147 | (keylen - 8, keyoff, 0, 0, 0) 148 | } 149 | } 150 | 151 | /// cmp_memtable_key efficiently compares two memtable keys by only parsing what's actually needed. 152 | pub fn cmp_memtable_key(ucmp: &dyn Cmp, a: MemtableKey<'_>, b: MemtableKey<'_>) -> Ordering { 153 | let (alen, aoff): (usize, usize) = VarInt::decode_var(a).unwrap(); 154 | let (blen, boff): (usize, usize) = VarInt::decode_var(b).unwrap(); 155 | let userkey_a = &a[aoff..aoff + alen - 8]; 156 | let userkey_b = &b[boff..boff + blen - 8]; 157 | 158 | match ucmp.cmp(userkey_a, userkey_b) { 159 | Ordering::Less => Ordering::Less, 160 | Ordering::Greater => Ordering::Greater, 161 | Ordering::Equal => { 162 | let atag = FixedInt::decode_fixed(&a[aoff + alen - 8..aoff + alen]); 163 | let btag = FixedInt::decode_fixed(&b[boff + blen - 8..boff + blen]); 164 | let (_, aseq) = parse_tag(atag); 165 | let (_, bseq) = parse_tag(btag); 166 | 167 | // reverse! 168 | bseq.cmp(&aseq) 169 | } 170 | } 171 | } 172 | 173 | /// Parse a key in InternalKey format. 174 | pub fn parse_internal_key(ikey: InternalKey) -> (ValueType, SequenceNumber, UserKey) { 175 | if ikey.is_empty() { 176 | return (ValueType::TypeDeletion, 0, &ikey[0..0]); 177 | } 178 | assert!(ikey.len() >= 8); 179 | let (typ, seq) = parse_tag(FixedInt::decode_fixed(&ikey[ikey.len() - 8..])); 180 | (typ, seq, &ikey[0..ikey.len() - 8]) 181 | } 182 | 183 | /// cmp_internal_key efficiently compares keys in InternalKey format by only parsing the parts that 184 | /// are actually needed for a comparison. 185 | pub fn cmp_internal_key(ucmp: &dyn Cmp, a: InternalKey<'_>, b: InternalKey<'_>) -> Ordering { 186 | match ucmp.cmp(&a[0..a.len() - 8], &b[0..b.len() - 8]) { 187 | Ordering::Less => Ordering::Less, 188 | Ordering::Greater => Ordering::Greater, 189 | Ordering::Equal => { 190 | let seqa = parse_tag(FixedInt::decode_fixed(&a[a.len() - 8..])).1; 191 | let seqb = parse_tag(FixedInt::decode_fixed(&b[b.len() - 8..])).1; 192 | // reverse comparison! 193 | seqb.cmp(&seqa) 194 | } 195 | } 196 | } 197 | 198 | /// truncate_to_userkey performs an in-place conversion from InternalKey to UserKey format. 199 | pub fn truncate_to_userkey(ikey: &mut Vec) { 200 | let len = ikey.len(); 201 | assert!(len >= 8); 202 | ikey.truncate(len - 8); 203 | } 204 | 205 | #[cfg(test)] 206 | mod tests { 207 | use super::*; 208 | 209 | #[test] 210 | fn test_memtable_lookupkey() { 211 | use integer_encoding::VarInt; 212 | 213 | let lk1 = LookupKey::new("abcde".as_bytes(), 123); 214 | let lk2 = LookupKey::new("xyabxy".as_bytes(), 97); 215 | 216 | // Assert correct allocation strategy 217 | assert_eq!(lk1.key.len(), 14); 218 | assert_eq!(lk1.key.capacity(), 14); 219 | 220 | assert_eq!(lk1.user_key(), "abcde".as_bytes()); 221 | assert_eq!(u32::decode_var(lk1.memtable_key()).unwrap(), (13, 1)); 222 | assert_eq!( 223 | lk2.internal_key(), 224 | vec![120, 121, 97, 98, 120, 121, 1, 97, 0, 0, 0, 0, 0, 0].as_slice() 225 | ); 226 | } 227 | 228 | #[test] 229 | fn test_build_memtable_key() { 230 | assert_eq!( 231 | build_memtable_key( 232 | "abc".as_bytes(), 233 | "123".as_bytes(), 234 | ValueType::TypeValue, 235 | 231 236 | ), 237 | vec![11, 97, 98, 99, 1, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51] 238 | ); 239 | assert_eq!( 240 | build_memtable_key("".as_bytes(), "123".as_bytes(), ValueType::TypeValue, 231), 241 | vec![8, 1, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51] 242 | ); 243 | assert_eq!( 244 | build_memtable_key( 245 | "abc".as_bytes(), 246 | "123".as_bytes(), 247 | ValueType::TypeDeletion, 248 | 231 249 | ), 250 | vec![11, 97, 98, 99, 0, 231, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51] 251 | ); 252 | assert_eq!( 253 | build_memtable_key( 254 | "abc".as_bytes(), 255 | "".as_bytes(), 256 | ValueType::TypeDeletion, 257 | 231 258 | ), 259 | vec![11, 97, 98, 99, 0, 231, 0, 0, 0, 0, 0, 0, 0] 260 | ); 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! rusty-leveldb is a reimplementation of LevelDB in pure rust. It depends only on a few crates, 2 | //! and is very close to the original, implementation-wise. The external API is relatively small 3 | //! and should be easy to use. 4 | //! 5 | //! ``` 6 | //! use rusty_leveldb::{DB, DBIterator, LdbIterator, Options}; 7 | //! 8 | //! let opt = rusty_leveldb::in_memory(); 9 | //! let mut db = DB::open("mydatabase", opt).unwrap(); 10 | //! 11 | //! db.put(b"Hello", b"World").unwrap(); 12 | //! assert_eq!(b"World", db.get(b"Hello").unwrap().as_slice()); 13 | //! 14 | //! let mut iter = db.new_iter().unwrap(); 15 | //! // Note: For efficiency reasons, it's recommended to use advance() and current() instead of 16 | //! // next() when iterating over many elements. 17 | //! assert_eq!((b"Hello".to_vec(), b"World".to_vec()), iter.next().unwrap()); 18 | //! 19 | //! db.delete(b"Hello").unwrap(); 20 | //! db.flush().unwrap(); 21 | //! ``` 22 | //! 23 | 24 | #![allow(dead_code)] 25 | 26 | #[cfg(feature = "fs")] 27 | extern crate errno; 28 | 29 | #[cfg(feature = "fs")] 30 | extern crate fs2; 31 | 32 | extern crate integer_encoding; 33 | extern crate rand; 34 | extern crate snap; 35 | 36 | #[cfg(test)] 37 | #[macro_use] 38 | extern crate time_test; 39 | 40 | #[macro_use] 41 | pub mod infolog; 42 | 43 | #[cfg(any(feature = "asyncdb-tokio", feature = "asyncdb-async-std"))] 44 | mod asyncdb; 45 | 46 | #[cfg(feature = "asyncdb-tokio")] 47 | mod asyncdb_tokio; 48 | #[cfg(feature = "asyncdb-tokio")] 49 | use asyncdb_tokio::{send_response, send_response_result, Message}; 50 | 51 | #[cfg(feature = "asyncdb-async-std")] 52 | mod asyncdb_async_std; 53 | #[cfg(feature = "asyncdb-async-std")] 54 | use asyncdb_async_std::{send_response, send_response_result, Message}; 55 | 56 | mod block; 57 | mod block_builder; 58 | mod blockhandle; 59 | mod cache; 60 | mod cmp; 61 | mod crc; 62 | 63 | #[cfg(feature = "fs")] 64 | mod disk_env; 65 | 66 | mod env_common; 67 | mod error; 68 | mod filter; 69 | mod filter_block; 70 | mod key_types; 71 | mod log; 72 | mod mem_env; 73 | mod memtable; 74 | mod merging_iter; 75 | mod options; 76 | mod skipmap; 77 | mod snapshot; 78 | mod table_block; 79 | mod table_builder; 80 | mod table_cache; 81 | mod table_reader; 82 | mod test_util; 83 | mod types; 84 | mod version; 85 | mod version_edit; 86 | mod version_set; 87 | mod write_batch; 88 | 89 | mod db_impl; 90 | mod db_iter; 91 | 92 | pub mod compressor; 93 | pub mod env; 94 | 95 | #[cfg(feature = "asyncdb-async-std")] 96 | pub use asyncdb_async_std::AsyncDB; 97 | #[cfg(feature = "asyncdb-tokio")] 98 | pub use asyncdb_tokio::AsyncDB; 99 | pub use cmp::{Cmp, DefaultCmp}; 100 | pub use compressor::{Compressor, CompressorId}; 101 | pub use db_impl::DB; 102 | pub use db_iter::DBIterator; 103 | 104 | #[cfg(feature = "fs")] 105 | pub use disk_env::PosixDiskEnv; 106 | 107 | pub use error::{Result, Status, StatusCode}; 108 | pub use filter::{BloomPolicy, FilterPolicy}; 109 | pub use mem_env::MemEnv; 110 | pub use options::{in_memory, CompressorList, Options}; 111 | pub use skipmap::SkipMap; 112 | pub use types::LdbIterator; 113 | pub use write_batch::WriteBatch; 114 | -------------------------------------------------------------------------------- /src/log.rs: -------------------------------------------------------------------------------- 1 | //! A log consists of a number of blocks. 2 | //! A block consists of a number of records and an optional trailer (filler). 3 | //! A record is a bytestring: [checksum: uint32, length: uint16, type: uint8, data: [u8]] 4 | //! checksum is the crc32 sum of type and data; type is one of RecordType::{Full/First/Middle/Last} 5 | 6 | use crate::crc; 7 | use crate::error::{err, Result, StatusCode}; 8 | 9 | use std::io::{Read, Write}; 10 | 11 | use integer_encoding::FixedInt; 12 | use integer_encoding::FixedIntWriter; 13 | 14 | const BLOCK_SIZE: usize = 32 * 1024; 15 | const HEADER_SIZE: usize = 4 + 2 + 1; 16 | 17 | #[derive(Clone, Copy)] 18 | pub enum RecordType { 19 | Full = 1, 20 | First = 2, 21 | Middle = 3, 22 | Last = 4, 23 | } 24 | 25 | pub struct LogWriter { 26 | dst: W, 27 | current_block_offset: usize, 28 | block_size: usize, 29 | } 30 | 31 | impl LogWriter { 32 | pub fn new(writer: W) -> LogWriter { 33 | LogWriter { 34 | dst: writer, 35 | current_block_offset: 0, 36 | block_size: BLOCK_SIZE, 37 | } 38 | } 39 | 40 | /// new_with_off opens a writer starting at some offset of an existing log file. The file must 41 | /// have the default block size. 42 | pub fn new_with_off(writer: W, off: usize) -> LogWriter { 43 | let mut w = LogWriter::new(writer); 44 | w.current_block_offset = off % BLOCK_SIZE; 45 | w 46 | } 47 | 48 | pub fn add_record(&mut self, mut record: &[u8]) -> Result { 49 | let mut first_frag = true; 50 | let mut result = Ok(0); 51 | while result.is_ok() && !record.is_empty() { 52 | assert!(self.block_size > HEADER_SIZE); 53 | 54 | let space_left = self.block_size - self.current_block_offset; 55 | 56 | // Fill up block; go to next block. 57 | if space_left < HEADER_SIZE { 58 | self.dst.write_all(&vec![0, 0, 0, 0, 0, 0][0..space_left])?; 59 | self.current_block_offset = 0; 60 | } 61 | 62 | let avail_for_data = self.block_size - self.current_block_offset - HEADER_SIZE; 63 | 64 | let data_frag_len = if record.len() < avail_for_data { 65 | record.len() 66 | } else { 67 | avail_for_data 68 | }; 69 | 70 | let recordtype; 71 | 72 | if first_frag && data_frag_len == record.len() { 73 | recordtype = RecordType::Full; 74 | } else if first_frag { 75 | recordtype = RecordType::First; 76 | } else if data_frag_len == record.len() { 77 | recordtype = RecordType::Last; 78 | } else { 79 | recordtype = RecordType::Middle; 80 | } 81 | 82 | result = self.emit_record(recordtype, record, data_frag_len); 83 | record = &record[data_frag_len..]; 84 | first_frag = false; 85 | } 86 | result 87 | } 88 | 89 | fn emit_record(&mut self, t: RecordType, data: &[u8], len: usize) -> Result { 90 | assert!(len < 256 * 256); 91 | 92 | let mut digest = crc::digest(); 93 | digest.update(&[t as u8]); 94 | digest.update(&data[0..len]); 95 | 96 | let chksum = mask_crc(digest.finalize()); 97 | 98 | let mut s = 0; 99 | s += self.dst.write(&chksum.encode_fixed_vec())?; 100 | s += self.dst.write_fixedint(len as u16)?; 101 | s += self.dst.write(&[t as u8])?; 102 | s += self.dst.write(&data[0..len])?; 103 | 104 | self.current_block_offset += s; 105 | Ok(s) 106 | } 107 | 108 | pub fn flush(&mut self) -> Result<()> { 109 | self.dst.flush()?; 110 | Ok(()) 111 | } 112 | } 113 | 114 | pub struct LogReader { 115 | // TODO: Wrap src in a buffer to enhance read performance. 116 | src: R, 117 | blk_off: usize, 118 | blocksize: usize, 119 | head_scratch: [u8; 7], 120 | checksums: bool, 121 | } 122 | 123 | impl LogReader { 124 | pub fn new(src: R, chksum: bool) -> LogReader { 125 | LogReader { 126 | src, 127 | blk_off: 0, 128 | blocksize: BLOCK_SIZE, 129 | checksums: chksum, 130 | head_scratch: [0; 7], 131 | } 132 | } 133 | 134 | /// EOF is signalled by Ok(0) 135 | pub fn read(&mut self, dst: &mut Vec) -> Result { 136 | let mut checksum: u32; 137 | let mut length: u16; 138 | let mut typ: u8; 139 | let mut dst_offset: usize = 0; 140 | 141 | dst.clear(); 142 | 143 | loop { 144 | if self.blocksize - self.blk_off < HEADER_SIZE { 145 | // skip to next block 146 | self.src 147 | .read_exact(&mut self.head_scratch[0..self.blocksize - self.blk_off])?; 148 | self.blk_off = 0; 149 | } 150 | 151 | let mut bytes_read = self.src.read(&mut self.head_scratch)?; 152 | 153 | // EOF 154 | if bytes_read == 0 { 155 | return Ok(0); 156 | } 157 | 158 | self.blk_off += bytes_read; 159 | 160 | checksum = u32::decode_fixed(&self.head_scratch[0..4]); 161 | length = u16::decode_fixed(&self.head_scratch[4..6]); 162 | typ = self.head_scratch[6]; 163 | 164 | dst.resize(dst_offset + length as usize, 0); 165 | bytes_read = self 166 | .src 167 | .read(&mut dst[dst_offset..dst_offset + length as usize])?; 168 | self.blk_off += bytes_read; 169 | 170 | if self.checksums 171 | && !self.check_integrity(typ, &dst[dst_offset..dst_offset + bytes_read], checksum) 172 | { 173 | return err(StatusCode::Corruption, "Invalid Checksum"); 174 | } 175 | 176 | dst_offset += length as usize; 177 | 178 | if typ == RecordType::Full as u8 { 179 | return Ok(dst_offset); 180 | } else if typ == RecordType::First as u8 { 181 | continue; 182 | } else if typ == RecordType::Middle as u8 { 183 | continue; 184 | } else if typ == RecordType::Last as u8 { 185 | return Ok(dst_offset); 186 | } 187 | } 188 | } 189 | 190 | fn check_integrity(&mut self, typ: u8, data: &[u8], expected: u32) -> bool { 191 | let mut digest = crc::digest(); 192 | digest.update(&[typ]); 193 | digest.update(data); 194 | unmask_crc(expected) == digest.finalize() 195 | } 196 | } 197 | 198 | const MASK_DELTA: u32 = 0xa282ead8; 199 | 200 | pub fn mask_crc(c: u32) -> u32 { 201 | (c.wrapping_shr(15) | c.wrapping_shl(17)).wrapping_add(MASK_DELTA) 202 | } 203 | 204 | pub fn unmask_crc(mc: u32) -> u32 { 205 | let rot = mc.wrapping_sub(MASK_DELTA); 206 | rot.wrapping_shr(17) | rot.wrapping_shl(15) 207 | } 208 | 209 | #[cfg(test)] 210 | mod tests { 211 | use super::*; 212 | use std::io::Cursor; 213 | 214 | #[test] 215 | fn test_crc_mask_crc() { 216 | let mut digest = crc::digest(); 217 | digest.update("abcde".as_bytes()); 218 | let sum = digest.finalize(); 219 | assert_eq!(sum, unmask_crc(mask_crc(sum))); 220 | assert!(sum != mask_crc(sum)); 221 | } 222 | 223 | #[test] 224 | fn test_crc_sanity() { 225 | assert_eq!(0x8a9136aa, crc::crc32([0_u8; 32])); 226 | assert_eq!(0x62a8ab43, crc::crc32([0xff_u8; 32])); 227 | } 228 | 229 | #[test] 230 | fn test_writer() { 231 | let data = &[ 232 | "hello world. My first log entry.", 233 | "and my second", 234 | "and my third", 235 | ]; 236 | let mut lw = LogWriter::new(Vec::new()); 237 | let total_len = data.iter().fold(0, |l, d| l + d.len()); 238 | 239 | for d in data { 240 | let _ = lw.add_record(d.as_bytes()); 241 | } 242 | 243 | assert_eq!(lw.current_block_offset, total_len + 3 * super::HEADER_SIZE); 244 | } 245 | 246 | #[test] 247 | fn test_writer_append() { 248 | let data = &[ 249 | "hello world. My first log entry.", 250 | "and my second", 251 | "and my third", 252 | ]; 253 | 254 | let mut dst = vec![0_u8; 1024]; 255 | 256 | { 257 | let mut lw = LogWriter::new(Cursor::new(dst.as_mut_slice())); 258 | for d in data { 259 | let _ = lw.add_record(d.as_bytes()); 260 | } 261 | } 262 | 263 | let old = dst.clone(); 264 | 265 | // Ensure that new_with_off positions the writer correctly. Some ugly mucking about with 266 | // cursors and stuff is required. 267 | { 268 | let offset = data[0].len() + super::HEADER_SIZE; 269 | let mut lw = 270 | LogWriter::new_with_off(Cursor::new(&mut dst.as_mut_slice()[offset..]), offset); 271 | for d in &data[1..] { 272 | let _ = lw.add_record(d.as_bytes()); 273 | } 274 | } 275 | assert_eq!(old, dst); 276 | } 277 | 278 | #[test] 279 | fn test_reader() { 280 | let data = [ 281 | "abcdefghi".as_bytes().to_vec(), // fits one block of 17 282 | "123456789012".as_bytes().to_vec(), // spans two blocks of 17 283 | "0101010101010101010101".as_bytes().to_vec(), 284 | ]; // spans three blocks of 17 285 | let mut lw = LogWriter::new(Vec::new()); 286 | lw.block_size = super::HEADER_SIZE + 10; 287 | 288 | for e in data.iter() { 289 | assert!(lw.add_record(e).is_ok()); 290 | } 291 | 292 | assert_eq!(lw.dst.len(), 93); 293 | // Corrupt first record. 294 | lw.dst[2] += 1; 295 | 296 | let mut lr = LogReader::new(lw.dst.as_slice(), true); 297 | lr.blocksize = super::HEADER_SIZE + 10; 298 | let mut dst = Vec::with_capacity(128); 299 | 300 | // First record is corrupted. 301 | assert_eq!( 302 | err(StatusCode::Corruption, "Invalid Checksum"), 303 | lr.read(&mut dst) 304 | ); 305 | 306 | let mut i = 1; 307 | loop { 308 | let r = lr.read(&mut dst); 309 | 310 | if r.is_err() { 311 | panic!("{}", r.unwrap_err()); 312 | } else if r.unwrap() == 0 { 313 | break; 314 | } 315 | 316 | assert_eq!(dst, data[i]); 317 | i += 1; 318 | } 319 | assert_eq!(i, data.len()); 320 | } 321 | } 322 | -------------------------------------------------------------------------------- /src/memtable.rs: -------------------------------------------------------------------------------- 1 | use crate::cmp::{Cmp, MemtableKeyCmp}; 2 | use crate::key_types::{build_memtable_key, parse_internal_key, parse_memtable_key, ValueType}; 3 | use crate::key_types::{LookupKey, UserKey}; 4 | use crate::skipmap::{SkipMap, SkipMapIter}; 5 | use crate::types::{current_key_val, LdbIterator, SequenceNumber}; 6 | 7 | use std::rc::Rc; 8 | 9 | use integer_encoding::FixedInt; 10 | 11 | /// Provides Insert/Get/Iterate, based on the SkipMap implementation. 12 | /// MemTable uses MemtableKeys internally, that is, it stores key and value in the [Skipmap] key. 13 | pub struct MemTable { 14 | map: SkipMap, 15 | } 16 | 17 | impl MemTable { 18 | /// Returns a new MemTable. 19 | /// This wraps opt.cmp inside a MemtableKey-specific comparator. 20 | pub fn new(cmp: Rc>) -> MemTable { 21 | MemTable::new_raw(Rc::new(Box::new(MemtableKeyCmp(cmp)))) 22 | } 23 | 24 | /// Doesn't wrap the comparator in a MemtableKeyCmp. 25 | fn new_raw(cmp: Rc>) -> MemTable { 26 | MemTable { 27 | map: SkipMap::new(cmp), 28 | } 29 | } 30 | 31 | pub fn len(&self) -> usize { 32 | self.map.len() 33 | } 34 | 35 | pub fn approx_mem_usage(&self) -> usize { 36 | self.map.approx_memory() 37 | } 38 | 39 | pub fn add(&mut self, seq: SequenceNumber, t: ValueType, key: UserKey<'_>, value: &[u8]) { 40 | self.map 41 | .insert(build_memtable_key(key, value, t, seq), Vec::new()) 42 | } 43 | 44 | /// get returns the value for the given entry and whether the entry is marked as deleted. This 45 | /// is to distinguish between not-found and found-deleted. 46 | #[allow(unused_variables)] 47 | pub fn get(&self, key: &LookupKey) -> (Option>, bool) { 48 | let mut iter = self.map.iter(); 49 | iter.seek(key.memtable_key()); 50 | 51 | if let Some((foundkey, _)) = current_key_val(&iter) { 52 | let (fkeylen, fkeyoff, tag, vallen, valoff) = parse_memtable_key(&foundkey); 53 | 54 | // Compare user key -- if equal, proceed 55 | // We only care about user key equality here 56 | if key.user_key() == &foundkey[fkeyoff..fkeyoff + fkeylen] { 57 | if tag & 0xff == ValueType::TypeValue as u64 { 58 | return (Some(foundkey[valoff..valoff + vallen].to_vec()), false); 59 | } else { 60 | return (None, true); 61 | } 62 | } 63 | } 64 | (None, false) 65 | } 66 | 67 | pub fn iter(&self) -> MemtableIterator { 68 | MemtableIterator { 69 | skipmapiter: self.map.iter(), 70 | } 71 | } 72 | } 73 | 74 | /// MemtableIterator is an iterator over a MemTable. It is mostly concerned with converting to and 75 | /// from the MemtableKey format used in the inner map; all key-taking or -returning methods deal 76 | /// with InternalKeys. 77 | /// 78 | /// This iterator does not skip deleted entries. 79 | pub struct MemtableIterator { 80 | skipmapiter: SkipMapIter, 81 | } 82 | 83 | impl LdbIterator for MemtableIterator { 84 | fn advance(&mut self) -> bool { 85 | if !self.skipmapiter.advance() { 86 | return false; 87 | } 88 | self.skipmapiter.valid() 89 | } 90 | fn reset(&mut self) { 91 | self.skipmapiter.reset(); 92 | } 93 | fn prev(&mut self) -> bool { 94 | // Make sure this is actually needed (skipping deleted values?). 95 | let (mut key, mut val) = (vec![], vec![]); 96 | loop { 97 | if !self.skipmapiter.prev() { 98 | return false; 99 | } 100 | if self.skipmapiter.current(&mut key, &mut val) { 101 | let (_, _, tag, _, _) = parse_memtable_key(&key); 102 | 103 | if tag & 0xff == ValueType::TypeValue as u64 { 104 | return true; 105 | } else { 106 | continue; 107 | } 108 | } else { 109 | return false; 110 | } 111 | } 112 | } 113 | fn valid(&self) -> bool { 114 | self.skipmapiter.valid() 115 | } 116 | /// current places the current key (in InternalKey format) and value into the supplied vectors. 117 | fn current(&self, key: &mut Vec, val: &mut Vec) -> bool { 118 | if !self.valid() { 119 | return false; 120 | } 121 | 122 | if self.skipmapiter.current(key, val) { 123 | let (keylen, keyoff, _, vallen, valoff) = parse_memtable_key(key); 124 | val.clear(); 125 | val.extend_from_slice(&key[valoff..valoff + vallen]); 126 | // zero-allocation truncation. 127 | shift_left(key, keyoff); 128 | // Truncate key to key+tag. 129 | key.truncate(keylen + u64::required_space()); 130 | true 131 | } else { 132 | panic!("should not happen"); 133 | } 134 | } 135 | /// seek takes an InternalKey. 136 | fn seek(&mut self, to: &[u8]) { 137 | // Assemble the correct memtable key from the supplied InternalKey. 138 | let (_, seq, ukey) = parse_internal_key(to); 139 | self.skipmapiter 140 | .seek(LookupKey::new(ukey, seq).memtable_key()); 141 | } 142 | } 143 | 144 | /// shift_left moves s[mid..] to s[0..s.len()-mid]. The new size is s.len()-mid. 145 | fn shift_left(s: &mut Vec, mid: usize) { 146 | for i in mid..s.len() { 147 | s.swap(i, i - mid); 148 | } 149 | let newlen = s.len() - mid; 150 | s.truncate(newlen); 151 | } 152 | 153 | #[cfg(test)] 154 | #[allow(unused_variables)] 155 | mod tests { 156 | use super::*; 157 | use crate::key_types::{parse_tag, truncate_to_userkey}; 158 | use crate::options; 159 | use crate::test_util::{test_iterator_properties, LdbIteratorIter}; 160 | 161 | #[test] 162 | fn test_shift_left() { 163 | let mut v = vec![1, 2, 3, 4, 5]; 164 | shift_left(&mut v, 1); 165 | assert_eq!(v, vec![2, 3, 4, 5]); 166 | 167 | let mut v = vec![1, 2, 3, 4, 5]; 168 | shift_left(&mut v, 4); 169 | assert_eq!(v, vec![5]); 170 | } 171 | 172 | fn get_memtable() -> MemTable { 173 | let mut mt = MemTable::new(options::for_test().cmp); 174 | let entries = vec![ 175 | (ValueType::TypeValue, 115, "abc", "122"), 176 | (ValueType::TypeValue, 120, "abc", "123"), 177 | (ValueType::TypeValue, 121, "abd", "124"), 178 | (ValueType::TypeDeletion, 122, "abe", "125"), 179 | (ValueType::TypeValue, 123, "abf", "126"), 180 | ]; 181 | 182 | for e in entries.iter() { 183 | mt.add(e.1, e.0, e.2.as_bytes(), e.3.as_bytes()); 184 | } 185 | mt 186 | } 187 | 188 | #[test] 189 | fn test_memtable_parse_tag() { 190 | let tag = (12345 << 8) | 1; 191 | assert_eq!(parse_tag(tag), (ValueType::TypeValue, 12345)); 192 | } 193 | 194 | #[test] 195 | fn test_memtable_add() { 196 | let mut mt = MemTable::new(options::for_test().cmp); 197 | mt.add( 198 | 123, 199 | ValueType::TypeValue, 200 | "abc".as_bytes(), 201 | "123".as_bytes(), 202 | ); 203 | 204 | assert_eq!( 205 | mt.map.iter().next().unwrap().0, 206 | &[11, 97, 98, 99, 1, 123, 0, 0, 0, 0, 0, 0, 3, 49, 50, 51] 207 | ); 208 | assert_eq!( 209 | mt.iter().next().unwrap().0, 210 | &[97, 98, 99, 1, 123, 0, 0, 0, 0, 0, 0] 211 | ); 212 | } 213 | 214 | #[test] 215 | fn test_memtable_add_get() { 216 | let mt = get_memtable(); 217 | 218 | // Smaller sequence number doesn't find entry 219 | if let Some(v) = mt.get(&LookupKey::new("abc".as_bytes(), 110)).0 { 220 | eprintln!("{:?}", v); 221 | panic!("found"); 222 | } 223 | 224 | if let Some(v) = mt.get(&LookupKey::new("abf".as_bytes(), 110)).0 { 225 | eprintln!("{:?}", v); 226 | panic!("found"); 227 | } 228 | 229 | // Bigger sequence number falls back to next smaller 230 | if let Some(v) = mt.get(&LookupKey::new("abc".as_bytes(), 116)).0 { 231 | assert_eq!(v, "122".as_bytes()); 232 | } else { 233 | panic!("not found"); 234 | } 235 | 236 | // Exact match works 237 | if let (Some(v), deleted) = mt.get(&LookupKey::new("abc".as_bytes(), 120)) { 238 | assert_eq!(v, "123".as_bytes()); 239 | assert!(!deleted); 240 | } else { 241 | panic!("not found"); 242 | } 243 | 244 | if let (None, deleted) = mt.get(&LookupKey::new("abe".as_bytes(), 122)) { 245 | assert!(deleted); 246 | } else { 247 | panic!("found deleted"); 248 | } 249 | 250 | if let Some(v) = mt.get(&LookupKey::new("abf".as_bytes(), 129)).0 { 251 | assert_eq!(v, "126".as_bytes()); 252 | } else { 253 | panic!("not found"); 254 | } 255 | } 256 | 257 | #[test] 258 | fn test_memtable_iterator_init() { 259 | let mt = get_memtable(); 260 | let mut iter = mt.iter(); 261 | 262 | assert!(!iter.valid()); 263 | iter.next(); 264 | assert!(iter.valid()); 265 | assert_eq!( 266 | current_key_val(&iter).unwrap().0, 267 | vec![97, 98, 99, 1, 120, 0, 0, 0, 0, 0, 0].as_slice() 268 | ); 269 | iter.reset(); 270 | assert!(!iter.valid()); 271 | } 272 | 273 | #[test] 274 | fn test_memtable_iterator_seek() { 275 | let mt = get_memtable(); 276 | let mut iter = mt.iter(); 277 | 278 | assert!(!iter.valid()); 279 | 280 | iter.seek(LookupKey::new("abc".as_bytes(), 400).internal_key()); 281 | let (mut gotkey, gotval) = current_key_val(&iter).unwrap(); 282 | truncate_to_userkey(&mut gotkey); 283 | assert_eq!( 284 | ("abc".as_bytes(), "123".as_bytes()), 285 | (gotkey.as_slice(), gotval.as_slice()) 286 | ); 287 | 288 | iter.seek(LookupKey::new("xxx".as_bytes(), 400).internal_key()); 289 | assert!(!iter.valid()); 290 | 291 | iter.seek(LookupKey::new("abd".as_bytes(), 400).internal_key()); 292 | let (mut gotkey, gotval) = current_key_val(&iter).unwrap(); 293 | truncate_to_userkey(&mut gotkey); 294 | assert_eq!( 295 | ("abd".as_bytes(), "124".as_bytes()), 296 | (gotkey.as_slice(), gotval.as_slice()) 297 | ); 298 | } 299 | 300 | #[test] 301 | fn test_memtable_iterator_fwd() { 302 | let mt = get_memtable(); 303 | let mut iter = mt.iter(); 304 | 305 | let expected = [ 306 | "123".as_bytes(), /* i.e., the abc entry with 307 | * higher sequence number comes first */ 308 | "122".as_bytes(), 309 | "124".as_bytes(), 310 | // deleted entry: 311 | "125".as_bytes(), 312 | "126".as_bytes(), 313 | ]; 314 | for (i, (k, v)) in LdbIteratorIter::wrap(&mut iter).enumerate() { 315 | assert_eq!(v, expected[i]); 316 | } 317 | } 318 | 319 | #[test] 320 | fn test_memtable_iterator_reverse() { 321 | let mt = get_memtable(); 322 | let mut iter = mt.iter(); 323 | 324 | // Bigger sequence number comes first 325 | iter.next(); 326 | assert!(iter.valid()); 327 | assert_eq!( 328 | current_key_val(&iter).unwrap().0, 329 | vec![97, 98, 99, 1, 120, 0, 0, 0, 0, 0, 0].as_slice() 330 | ); 331 | 332 | iter.next(); 333 | assert!(iter.valid()); 334 | assert_eq!( 335 | current_key_val(&iter).unwrap().0, 336 | vec![97, 98, 99, 1, 115, 0, 0, 0, 0, 0, 0].as_slice() 337 | ); 338 | 339 | iter.next(); 340 | assert!(iter.valid()); 341 | assert_eq!( 342 | current_key_val(&iter).unwrap().0, 343 | vec![97, 98, 100, 1, 121, 0, 0, 0, 0, 0, 0].as_slice() 344 | ); 345 | 346 | iter.prev(); 347 | assert!(iter.valid()); 348 | assert_eq!( 349 | current_key_val(&iter).unwrap().0, 350 | vec![97, 98, 99, 1, 115, 0, 0, 0, 0, 0, 0].as_slice() 351 | ); 352 | 353 | iter.prev(); 354 | assert!(iter.valid()); 355 | assert_eq!( 356 | current_key_val(&iter).unwrap().0, 357 | vec![97, 98, 99, 1, 120, 0, 0, 0, 0, 0, 0].as_slice() 358 | ); 359 | 360 | iter.prev(); 361 | assert!(!iter.valid()); 362 | } 363 | 364 | #[test] 365 | fn test_memtable_parse_key() { 366 | let key = vec![11, 1, 2, 3, 1, 123, 0, 0, 0, 0, 0, 0, 3, 4, 5, 6]; 367 | let (keylen, keyoff, tag, vallen, valoff) = parse_memtable_key(&key); 368 | assert_eq!(keylen, 3); 369 | assert_eq!(&key[keyoff..keyoff + keylen], vec![1, 2, 3].as_slice()); 370 | assert_eq!(tag, 123 << 8 | 1); 371 | assert_eq!(vallen, 3); 372 | assert_eq!(&key[valoff..valoff + vallen], vec![4, 5, 6].as_slice()); 373 | } 374 | 375 | #[test] 376 | fn test_memtable_iterator_behavior() { 377 | let mut mt = MemTable::new(options::for_test().cmp); 378 | let entries = [ 379 | (115, "abc", "122"), 380 | (120, "abd", "123"), 381 | (121, "abe", "124"), 382 | (123, "abf", "126"), 383 | ]; 384 | 385 | for e in entries.iter() { 386 | mt.add(e.0, ValueType::TypeValue, e.1.as_bytes(), e.2.as_bytes()); 387 | } 388 | 389 | test_iterator_properties(mt.iter()); 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /src/merging_iter.rs: -------------------------------------------------------------------------------- 1 | use crate::cmp::Cmp; 2 | use crate::types::{current_key_val, Direction, LdbIterator}; 3 | 4 | use std::cmp::Ordering; 5 | use std::rc::Rc; 6 | 7 | // Warning: This module is kinda messy. The original implementation is 8 | // not that much better though :-) 9 | // 10 | // Issues: 1) prev() may not work correctly at the beginning of a merging 11 | // iterator. 12 | 13 | #[derive(PartialEq)] 14 | enum SL { 15 | Smallest, 16 | Largest, 17 | } 18 | 19 | pub struct MergingIter { 20 | iters: Vec>, 21 | current: Option, 22 | direction: Direction, 23 | cmp: Rc>, 24 | } 25 | 26 | impl MergingIter { 27 | /// Construct a new merging iterator. 28 | pub fn new(cmp: Rc>, iters: Vec>) -> MergingIter { 29 | MergingIter { 30 | iters, 31 | current: None, 32 | direction: Direction::Forward, 33 | cmp, 34 | } 35 | } 36 | 37 | fn init(&mut self) { 38 | for i in 0..self.iters.len() { 39 | self.iters[i].reset(); 40 | self.iters[i].advance(); 41 | if !self.iters[i].valid() { 42 | self.iters[i].reset() 43 | } 44 | } 45 | self.find_smallest(); 46 | } 47 | 48 | /// Adjusts the direction of the iterator depending on whether the last 49 | /// call was next() or prev(). This basically sets all iterators to one 50 | /// entry after (Forward) or one entry before (Reverse) the current() entry. 51 | fn update_direction(&mut self, d: Direction) { 52 | if self.direction == d { 53 | return; 54 | } 55 | 56 | let mut keybuf = vec![]; 57 | let mut valbuf = vec![]; 58 | 59 | if let Some((key, _)) = current_key_val(self) { 60 | if let Some(current) = self.current { 61 | match d { 62 | Direction::Forward if self.direction == Direction::Reverse => { 63 | self.direction = Direction::Forward; 64 | for i in 0..self.iters.len() { 65 | if i != current { 66 | self.iters[i].seek(&key); 67 | // This doesn't work if two iterators are returning the exact same 68 | // keys. However, in reality, two entries will always have differing 69 | // sequence numbers. 70 | if self.iters[i].current(&mut keybuf, &mut valbuf) 71 | && self.cmp.cmp(&keybuf, &key) == Ordering::Equal 72 | { 73 | self.iters[i].advance(); 74 | } 75 | } 76 | } 77 | } 78 | Direction::Reverse if self.direction == Direction::Forward => { 79 | self.direction = Direction::Reverse; 80 | for i in 0..self.iters.len() { 81 | if i != current { 82 | self.iters[i].seek(&key); 83 | if self.iters[i].valid() { 84 | self.iters[i].prev(); 85 | } else { 86 | // seek to last. 87 | while self.iters[i].advance() {} 88 | } 89 | } 90 | } 91 | } 92 | _ => {} 93 | } 94 | } 95 | } 96 | } 97 | 98 | fn find_smallest(&mut self) { 99 | self.find(SL::Smallest) 100 | } 101 | fn find_largest(&mut self) { 102 | self.find(SL::Largest) 103 | } 104 | 105 | fn find(&mut self, direction: SL) { 106 | if self.iters.is_empty() { 107 | // Iterator stays invalid. 108 | return; 109 | } 110 | 111 | let ord = if direction == SL::Smallest { 112 | Ordering::Less 113 | } else { 114 | Ordering::Greater 115 | }; 116 | 117 | let mut next_ix = 0; 118 | let (mut current, mut smallest, mut valscratch) = (vec![], vec![], vec![]); 119 | 120 | for i in 1..self.iters.len() { 121 | if self.iters[i].current(&mut current, &mut valscratch) { 122 | if self.iters[next_ix].current(&mut smallest, &mut valscratch) { 123 | if self.cmp.cmp(¤t, &smallest) == ord { 124 | next_ix = i; 125 | } 126 | } else { 127 | next_ix = i; 128 | } 129 | } 130 | } 131 | 132 | self.current = Some(next_ix); 133 | } 134 | } 135 | 136 | impl LdbIterator for MergingIter { 137 | fn advance(&mut self) -> bool { 138 | if let Some(current) = self.current { 139 | self.update_direction(Direction::Forward); 140 | if !self.iters[current].advance() { 141 | // Take this iterator out of rotation; this will return false 142 | // for every call to current() and thus it will be ignored 143 | // from here on. 144 | self.iters[current].reset(); 145 | } 146 | self.find_smallest(); 147 | } else { 148 | self.init(); 149 | } 150 | self.valid() 151 | } 152 | fn valid(&self) -> bool { 153 | if let Some(ix) = self.current { 154 | self.iters[ix].valid() 155 | } else { 156 | false 157 | } 158 | } 159 | fn seek(&mut self, key: &[u8]) { 160 | for i in 0..self.iters.len() { 161 | self.iters[i].seek(key); 162 | } 163 | self.find_smallest(); 164 | } 165 | fn reset(&mut self) { 166 | for i in 0..self.iters.len() { 167 | self.iters[i].reset(); 168 | } 169 | self.current = None; 170 | } 171 | fn current(&self, key: &mut Vec, val: &mut Vec) -> bool { 172 | if let Some(ix) = self.current { 173 | self.iters[ix].current(key, val) 174 | } else { 175 | false 176 | } 177 | } 178 | fn prev(&mut self) -> bool { 179 | if let Some(current) = self.current { 180 | if self.iters[current].valid() { 181 | self.update_direction(Direction::Reverse); 182 | self.iters[current].prev(); 183 | self.find_largest(); 184 | self.valid() 185 | } else { 186 | false 187 | } 188 | } else { 189 | false 190 | } 191 | } 192 | } 193 | 194 | #[cfg(test)] 195 | mod tests { 196 | use super::*; 197 | 198 | use crate::cmp::DefaultCmp; 199 | use crate::skipmap::tests; 200 | use crate::test_util::{test_iterator_properties, LdbIteratorIter, TestLdbIter}; 201 | use crate::types::{current_key_val, LdbIterator}; 202 | 203 | #[test] 204 | fn test_merging_one() { 205 | let skm = tests::make_skipmap(); 206 | let iter = skm.iter(); 207 | let mut iter2 = skm.iter(); 208 | 209 | let mut miter = MergingIter::new(Rc::new(Box::new(DefaultCmp)), vec![Box::new(iter)]); 210 | 211 | while let Some((k, v)) = miter.next() { 212 | if let Some((k2, v2)) = iter2.next() { 213 | assert_eq!(k, k2); 214 | assert_eq!(v, v2); 215 | } else { 216 | panic!("Expected element from iter2"); 217 | } 218 | } 219 | } 220 | 221 | #[test] 222 | fn test_merging_two() { 223 | let skm = tests::make_skipmap(); 224 | let iter = skm.iter(); 225 | let iter2 = skm.iter(); 226 | 227 | let mut miter = MergingIter::new( 228 | Rc::new(Box::new(DefaultCmp)), 229 | vec![Box::new(iter), Box::new(iter2)], 230 | ); 231 | 232 | while let Some((k, v)) = miter.next() { 233 | if let Some((k2, v2)) = miter.next() { 234 | assert_eq!(k, k2); 235 | assert_eq!(v, v2); 236 | } else { 237 | panic!("Odd number of elements"); 238 | } 239 | } 240 | } 241 | 242 | #[test] 243 | fn test_merging_zero() { 244 | let mut miter = MergingIter::new(Rc::new(Box::new(DefaultCmp)), vec![]); 245 | assert_eq!(0, LdbIteratorIter::wrap(&mut miter).count()); 246 | } 247 | 248 | #[test] 249 | fn test_merging_behavior() { 250 | let val = b"def"; 251 | let iter = TestLdbIter::new(vec![(b("aba"), val), (b("abc"), val)]); 252 | let iter2 = TestLdbIter::new(vec![(b("abb"), val), (b("abd"), val)]); 253 | let miter = MergingIter::new( 254 | Rc::new(Box::new(DefaultCmp)), 255 | vec![Box::new(iter), Box::new(iter2)], 256 | ); 257 | test_iterator_properties(miter); 258 | } 259 | 260 | #[test] 261 | fn test_merging_forward_backward() { 262 | let val = b"def"; 263 | let iter = TestLdbIter::new(vec![(b("aba"), val), (b("abc"), val), (b("abe"), val)]); 264 | let iter2 = TestLdbIter::new(vec![(b("abb"), val), (b("abd"), val)]); 265 | 266 | let mut miter = MergingIter::new( 267 | Rc::new(Box::new(DefaultCmp)), 268 | vec![Box::new(iter), Box::new(iter2)], 269 | ); 270 | 271 | // miter should return the following sequence: [aba, abb, abc, abd, abe] 272 | 273 | // -> aba 274 | let first = miter.next(); 275 | // -> abb 276 | let second = miter.next(); 277 | // -> abc 278 | let third = miter.next(); 279 | eprintln!("{:?} {:?} {:?}", first, second, third); 280 | 281 | assert!(first != third); 282 | // abb <- 283 | assert!(miter.prev()); 284 | assert_eq!(second, current_key_val(&miter)); 285 | // aba <- 286 | assert!(miter.prev()); 287 | assert_eq!(first, current_key_val(&miter)); 288 | // -> abb 289 | assert!(miter.advance()); 290 | assert_eq!(second, current_key_val(&miter)); 291 | // -> abc 292 | assert!(miter.advance()); 293 | assert_eq!(third, current_key_val(&miter)); 294 | // -> abd 295 | assert!(miter.advance()); 296 | assert_eq!( 297 | Some((b("abd").to_vec(), val.to_vec())), 298 | current_key_val(&miter) 299 | ); 300 | } 301 | 302 | fn b(s: &'static str) -> &'static [u8] { 303 | s.as_bytes() 304 | } 305 | 306 | #[test] 307 | fn test_merging_real() { 308 | let val = b"def"; 309 | 310 | let it1 = TestLdbIter::new(vec![(b("aba"), val), (b("abc"), val), (b("abe"), val)]); 311 | let it2 = TestLdbIter::new(vec![(b("abb"), val), (b("abd"), val)]); 312 | let expected = [b("aba"), b("abb"), b("abc"), b("abd"), b("abe")]; 313 | 314 | let mut iter = MergingIter::new( 315 | Rc::new(Box::new(DefaultCmp)), 316 | vec![Box::new(it1), Box::new(it2)], 317 | ); 318 | 319 | for (i, (k, _)) in LdbIteratorIter::wrap(&mut iter).enumerate() { 320 | assert_eq!(k, expected[i]); 321 | } 322 | } 323 | 324 | #[test] 325 | fn test_merging_seek_reset() { 326 | let val = b"def"; 327 | 328 | let it1 = TestLdbIter::new(vec![(b("aba"), val), (b("abc"), val), (b("abe"), val)]); 329 | let it2 = TestLdbIter::new(vec![(b("abb"), val), (b("abd"), val)]); 330 | 331 | let mut iter = MergingIter::new( 332 | Rc::new(Box::new(DefaultCmp)), 333 | vec![Box::new(it1), Box::new(it2)], 334 | ); 335 | 336 | assert!(!iter.valid()); 337 | iter.advance(); 338 | assert!(iter.valid()); 339 | assert!(current_key_val(&iter).is_some()); 340 | 341 | iter.seek(b"abc"); 342 | assert_eq!( 343 | current_key_val(&iter), 344 | Some((b("abc").to_vec(), val.to_vec())) 345 | ); 346 | iter.seek(b"ab0"); 347 | assert_eq!( 348 | current_key_val(&iter), 349 | Some((b("aba").to_vec(), val.to_vec())) 350 | ); 351 | iter.seek(b"abx"); 352 | assert_eq!(current_key_val(&iter), None); 353 | 354 | iter.reset(); 355 | assert!(!iter.valid()); 356 | iter.next(); 357 | assert_eq!( 358 | current_key_val(&iter), 359 | Some((b("aba").to_vec(), val.to_vec())) 360 | ); 361 | } 362 | } 363 | -------------------------------------------------------------------------------- /src/options.rs: -------------------------------------------------------------------------------- 1 | use crate::cmp::{Cmp, DefaultCmp}; 2 | use crate::compressor::{self, Compressor, CompressorId}; 3 | use crate::env::Env; 4 | use crate::infolog::{self, Logger}; 5 | use crate::mem_env::MemEnv; 6 | use crate::types::{share, Shared}; 7 | use crate::Result; 8 | use crate::{filter, Status, StatusCode}; 9 | 10 | use std::default::Default; 11 | use std::rc::Rc; 12 | 13 | const KB: usize = 1 << 10; 14 | const MB: usize = KB * KB; 15 | 16 | const BLOCK_MAX_SIZE: usize = 4 * KB; 17 | const BLOCK_CACHE_CAPACITY: usize = 8 * MB; 18 | const WRITE_BUFFER_SIZE: usize = 4 * MB; 19 | const DEFAULT_BITS_PER_KEY: u32 = 10; // NOTE: This may need to be optimized. 20 | 21 | /// Options contains general parameters for a LevelDB instance. Most of the names are 22 | /// self-explanatory; the defaults are defined in the `Default` implementation. 23 | #[derive(Clone)] 24 | pub struct Options { 25 | pub cmp: Rc>, 26 | pub env: Rc>, 27 | pub log: Option>, 28 | pub create_if_missing: bool, 29 | pub error_if_exists: bool, 30 | pub paranoid_checks: bool, 31 | pub write_buffer_size: usize, 32 | pub max_open_files: usize, 33 | pub max_file_size: usize, 34 | pub block_cache_capacity_bytes: usize, 35 | pub block_size: usize, 36 | pub block_restart_interval: usize, 37 | /// Compressor id in compressor list 38 | /// 39 | /// Note: you have to open a database with the same compression type as it was written to, in 40 | /// order to not lose data! (this is a bug and will be fixed) 41 | pub compressor: u8, 42 | 43 | pub compressor_list: Rc, 44 | pub reuse_logs: bool, 45 | pub reuse_manifest: bool, 46 | pub filter_policy: filter::BoxedFilterPolicy, 47 | } 48 | 49 | #[cfg(feature = "fs")] 50 | type DefaultEnv = crate::disk_env::PosixDiskEnv; 51 | 52 | #[cfg(not(feature = "fs"))] 53 | type DefaultEnv = crate::mem_env::MemEnv; 54 | 55 | impl Default for Options { 56 | fn default() -> Options { 57 | Options { 58 | cmp: Rc::new(Box::new(DefaultCmp)), 59 | env: Rc::new(Box::new(DefaultEnv::new())), 60 | log: None, 61 | create_if_missing: true, 62 | error_if_exists: false, 63 | paranoid_checks: false, 64 | write_buffer_size: WRITE_BUFFER_SIZE, 65 | max_open_files: 1 << 10, 66 | max_file_size: 2 << 20, 67 | block_cache_capacity_bytes: BLOCK_MAX_SIZE * 1024, 68 | block_size: BLOCK_MAX_SIZE, 69 | block_restart_interval: 16, 70 | reuse_logs: true, 71 | reuse_manifest: true, 72 | compressor: 0, 73 | compressor_list: Rc::new(CompressorList::default()), 74 | filter_policy: Rc::new(Box::new(filter::BloomPolicy::new(DEFAULT_BITS_PER_KEY))), 75 | } 76 | } 77 | } 78 | 79 | /// Customize compressor method for leveldb 80 | /// 81 | /// `Default` value is like the code below 82 | /// ``` 83 | /// # use rusty_leveldb::{compressor, CompressorList}; 84 | /// let mut list = CompressorList::new(); 85 | /// list.set(compressor::NoneCompressor); 86 | /// list.set(compressor::SnappyCompressor); 87 | /// ``` 88 | pub struct CompressorList([Option>; 256]); 89 | 90 | impl CompressorList { 91 | /// Create a **Empty** compressor list 92 | pub fn new() -> Self { 93 | const INIT: Option> = None; 94 | Self([INIT; 256]) 95 | } 96 | 97 | /// Set compressor with the id in `CompressorId` trait 98 | pub fn set(&mut self, compressor: T) 99 | where 100 | T: Compressor + CompressorId + 'static, 101 | { 102 | self.set_with_id(T::ID, compressor) 103 | } 104 | 105 | /// Set compressor with id 106 | pub fn set_with_id(&mut self, id: u8, compressor: impl Compressor + 'static) { 107 | self.0[id as usize] = Some(Box::new(compressor)); 108 | } 109 | 110 | pub fn is_set(&self, id: u8) -> bool { 111 | self.0[id as usize].is_some() 112 | } 113 | 114 | pub fn get(&self, id: u8) -> Result<&dyn Compressor> { 115 | self.0[id as usize] 116 | .as_ref() 117 | .map(AsRef::as_ref) 118 | .ok_or_else(|| Status { 119 | code: StatusCode::NotSupported, 120 | err: format!("invalid compression id `{}`", id), 121 | }) 122 | } 123 | } 124 | 125 | impl Default for CompressorList { 126 | fn default() -> Self { 127 | let mut list = Self::new(); 128 | list.set(compressor::NoneCompressor); 129 | list.set(compressor::SnappyCompressor); 130 | list 131 | } 132 | } 133 | 134 | /// Returns Options that will cause a database to exist purely in-memory instead of being stored on 135 | /// disk. This is useful for testing or ephemeral databases. 136 | pub fn in_memory() -> Options { 137 | Options { 138 | env: Rc::new(Box::new(MemEnv::new())), 139 | ..Options::default() 140 | } 141 | } 142 | 143 | pub fn for_test() -> Options { 144 | Options { 145 | env: Rc::new(Box::new(MemEnv::new())), 146 | log: Some(share(infolog::stderr())), 147 | ..Options::default() 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/snapshot.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::types::{share, SequenceNumber, Shared, MAX_SEQUENCE_NUMBER}; 4 | 5 | use std::rc::Rc; 6 | 7 | /// Opaque snapshot handle; Represents index to SnapshotList.map 8 | type SnapshotHandle = u64; 9 | 10 | /// An InnerSnapshot is shared by several Snapshots. This enables cloning snapshots, and a snapshot 11 | /// is released once the last instance is dropped. 12 | #[derive(Clone)] 13 | struct InnerSnapshot { 14 | id: SnapshotHandle, 15 | seq: SequenceNumber, 16 | sl: Shared, 17 | } 18 | 19 | impl Drop for InnerSnapshot { 20 | fn drop(&mut self) { 21 | self.sl.borrow_mut().delete(self.id); 22 | } 23 | } 24 | 25 | #[derive(Clone)] 26 | pub struct Snapshot { 27 | inner: Rc, 28 | } 29 | 30 | impl Snapshot { 31 | pub fn sequence(&self) -> SequenceNumber { 32 | self.inner.seq 33 | } 34 | } 35 | 36 | /// A list of all snapshots is kept in the DB. 37 | struct InnerSnapshotList { 38 | map: HashMap, 39 | newest: SnapshotHandle, 40 | oldest: SnapshotHandle, 41 | } 42 | 43 | pub struct SnapshotList { 44 | inner: Shared, 45 | } 46 | 47 | impl SnapshotList { 48 | pub fn new() -> SnapshotList { 49 | SnapshotList { 50 | inner: share(InnerSnapshotList { 51 | map: HashMap::new(), 52 | newest: 0, 53 | oldest: 0, 54 | }), 55 | } 56 | } 57 | 58 | pub fn new_snapshot(&mut self, seq: SequenceNumber) -> Snapshot { 59 | let inner = self.inner.clone(); 60 | let mut sl = self.inner.borrow_mut(); 61 | 62 | sl.newest += 1; 63 | let newest = sl.newest; 64 | sl.map.insert(newest, seq); 65 | 66 | if sl.oldest == 0 { 67 | sl.oldest = sl.newest; 68 | } 69 | 70 | Snapshot { 71 | inner: Rc::new(InnerSnapshot { 72 | id: sl.newest, 73 | seq, 74 | sl: inner, 75 | }), 76 | } 77 | } 78 | 79 | /// oldest returns the lowest sequence number of all snapshots. It returns 0 if no snapshots 80 | /// are present. 81 | pub fn oldest(&self) -> SequenceNumber { 82 | let oldest = self 83 | .inner 84 | .borrow() 85 | .map 86 | .iter() 87 | .fold( 88 | MAX_SEQUENCE_NUMBER, 89 | |s, (seq, _)| if *seq < s { *seq } else { s }, 90 | ); 91 | if oldest == MAX_SEQUENCE_NUMBER { 92 | 0 93 | } else { 94 | oldest 95 | } 96 | } 97 | 98 | /// newest returns the newest sequence number of all snapshots. If no snapshots are present, it 99 | /// returns 0. 100 | pub fn newest(&self) -> SequenceNumber { 101 | self.inner 102 | .borrow() 103 | .map 104 | .iter() 105 | .fold(0, |s, (seq, _)| if *seq > s { *seq } else { s }) 106 | } 107 | 108 | pub fn empty(&self) -> bool { 109 | self.inner.borrow().oldest == 0 110 | } 111 | } 112 | 113 | impl InnerSnapshotList { 114 | fn delete(&mut self, id: SnapshotHandle) { 115 | self.map.remove(&id); 116 | } 117 | } 118 | 119 | #[cfg(test)] 120 | mod tests { 121 | use super::*; 122 | 123 | #[allow(unused_variables)] 124 | #[test] 125 | fn test_snapshot_list() { 126 | let mut l = SnapshotList::new(); 127 | 128 | { 129 | assert!(l.empty()); 130 | let a = l.new_snapshot(1); 131 | 132 | { 133 | let b = l.new_snapshot(2); 134 | 135 | { 136 | let c = l.new_snapshot(3); 137 | 138 | assert!(!l.empty()); 139 | assert_eq!(l.oldest(), 1); 140 | assert_eq!(l.newest(), 3); 141 | } 142 | 143 | assert_eq!(l.newest(), 2); 144 | assert_eq!(l.oldest(), 1); 145 | } 146 | 147 | assert_eq!(l.oldest(), 1); 148 | } 149 | assert_eq!(l.oldest(), 0); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/table_block.rs: -------------------------------------------------------------------------------- 1 | use crate::block::Block; 2 | use crate::blockhandle::BlockHandle; 3 | use crate::crc; 4 | use crate::env::RandomAccess; 5 | use crate::error::{err, Result, StatusCode}; 6 | use crate::filter; 7 | use crate::filter_block::FilterBlockReader; 8 | use crate::log::unmask_crc; 9 | use crate::options::Options; 10 | use crate::table_builder; 11 | 12 | use integer_encoding::FixedInt; 13 | 14 | /// Reads the data for the specified block handle from a file. 15 | fn read_bytes(f: &dyn RandomAccess, location: &BlockHandle) -> Result> { 16 | let mut buf = vec![0; location.size()]; 17 | f.read_at(location.offset(), &mut buf).map(|_| buf) 18 | } 19 | 20 | /// Reads a serialized filter block from a file and returns a FilterBlockReader. 21 | pub fn read_filter_block( 22 | src: &dyn RandomAccess, 23 | location: &BlockHandle, 24 | policy: filter::BoxedFilterPolicy, 25 | ) -> Result { 26 | if location.size() == 0 { 27 | return err( 28 | StatusCode::InvalidArgument, 29 | "no filter block in empty location", 30 | ); 31 | } 32 | let buf = read_bytes(src, location)?; 33 | Ok(FilterBlockReader::new_owned(policy, buf)) 34 | } 35 | 36 | /// Reads a table block from a random-access source. 37 | /// A table block consists of [bytes..., compress (1B), checksum (4B)]; the handle only refers to 38 | /// the location and length of [bytes...]. 39 | pub fn read_table_block( 40 | opt: Options, 41 | f: &dyn RandomAccess, 42 | location: &BlockHandle, 43 | ) -> Result { 44 | // The block is denoted by offset and length in BlockHandle. A block in an encoded 45 | // table is followed by 1B compression type and 4B checksum. 46 | // The checksum refers to the compressed contents. 47 | let buf = read_bytes(f, location)?; 48 | let compress = read_bytes( 49 | f, 50 | &BlockHandle::new( 51 | location.offset() + location.size(), 52 | table_builder::TABLE_BLOCK_COMPRESS_LEN, 53 | ), 54 | )?; 55 | let cksum = read_bytes( 56 | f, 57 | &BlockHandle::new( 58 | location.offset() + location.size() + table_builder::TABLE_BLOCK_COMPRESS_LEN, 59 | table_builder::TABLE_BLOCK_CKSUM_LEN, 60 | ), 61 | )?; 62 | 63 | if !verify_table_block(&buf, compress[0], unmask_crc(u32::decode_fixed(&cksum))) { 64 | return err( 65 | StatusCode::Corruption, 66 | &format!( 67 | "checksum verification failed for block at {}", 68 | location.offset() 69 | ), 70 | ); 71 | } 72 | let compressor_list = opt.compressor_list.clone(); 73 | 74 | Ok(Block::new( 75 | opt, 76 | compressor_list.get(compress[0])?.decode(buf)?, 77 | )) 78 | } 79 | 80 | /// Verify checksum of block 81 | fn verify_table_block(data: &[u8], compression: u8, want: u32) -> bool { 82 | let mut digest = crc::digest(); 83 | digest.update(data); 84 | digest.update(&[compression; 1]); 85 | digest.finalize() == want 86 | } 87 | -------------------------------------------------------------------------------- /src/table_builder.rs: -------------------------------------------------------------------------------- 1 | use crate::block::BlockContents; 2 | use crate::block_builder::BlockBuilder; 3 | use crate::blockhandle::BlockHandle; 4 | use crate::cmp::InternalKeyCmp; 5 | use crate::compressor::{self, Compressor, CompressorId}; 6 | use crate::crc; 7 | use crate::error::Result; 8 | use crate::filter::{InternalFilterPolicy, NoFilterPolicy}; 9 | use crate::filter_block::FilterBlockBuilder; 10 | use crate::key_types::InternalKey; 11 | use crate::log::mask_crc; 12 | use crate::options::Options; 13 | 14 | use std::cmp::Ordering; 15 | use std::io::Write; 16 | use std::rc::Rc; 17 | 18 | use integer_encoding::FixedIntWriter; 19 | 20 | pub const FOOTER_LENGTH: usize = 40; 21 | pub const FULL_FOOTER_LENGTH: usize = FOOTER_LENGTH + 8; 22 | pub const MAGIC_FOOTER_NUMBER: u64 = 0xdb4775248b80fb57; 23 | pub const MAGIC_FOOTER_ENCODED: [u8; 8] = [0x57, 0xfb, 0x80, 0x8b, 0x24, 0x75, 0x47, 0xdb]; 24 | 25 | pub const TABLE_BLOCK_COMPRESS_LEN: usize = 1; 26 | pub const TABLE_BLOCK_CKSUM_LEN: usize = 4; 27 | 28 | /// Footer is a helper for encoding/decoding a table footer. 29 | #[derive(Debug, Clone)] 30 | pub struct Footer { 31 | pub meta_index: BlockHandle, 32 | pub index: BlockHandle, 33 | } 34 | 35 | /// A Table footer contains a pointer to the metaindex block, another pointer to the index block, 36 | /// and a magic number: 37 | /// [ { table data ... , METAINDEX blockhandle, INDEX blockhandle, PADDING bytes } = 40 bytes, 38 | /// MAGIC_FOOTER_ENCODED ] 39 | impl Footer { 40 | pub fn new(metaix: BlockHandle, index: BlockHandle) -> Footer { 41 | Footer { 42 | meta_index: metaix, 43 | index, 44 | } 45 | } 46 | 47 | pub fn decode(from: &[u8]) -> Option