├── .gitignore ├── AUTHORS ├── Cargo.toml ├── LICENSE ├── Makefile ├── README.md ├── RELEASES.md ├── TODO.md ├── benches ├── croaring_bench.rs ├── gate_bench.rs ├── hasher_bench.rs ├── jealloc_bench.rs ├── std_benches.rs └── util_benches.rs ├── docs ├── PughConcurrentSkiplist.pdf ├── PughSkiplistPaper.pdf ├── RBTreesKim.pdf ├── Sedgewick-LLRB.pdf ├── Sedgewick-Talk-Penn2008.pdf ├── git2.md ├── osdi14-paper-zheng_mai.pdf ├── reference.md ├── serialize_versions.md └── snapshots.md ├── perf.sh ├── quick_push.sh ├── run_nightly.sh ├── rustfmt.toml └── src ├── _archive ├── backup.rs ├── core.rs ├── core_test.rs ├── dgm.rs ├── dgm_test.rs ├── dlog.rs ├── dlog_entry.rs ├── dlog_entry_test.rs ├── dlog_journal.rs ├── dlog_journal_test.rs ├── dlog_marker.rs ├── entry.rs ├── fs2_test.rs ├── llrb.rs ├── llrb_common.rs ├── llrb_node.rs ├── llrb_test.rs ├── lsm.rs ├── lsm_test.rs ├── mvcc.rs ├── mvcc_test.rs ├── nodisk.rs ├── panic.rs ├── raft_log.rs ├── raft_log_test.rs ├── rdms.rs ├── ref_test.rs ├── robt.rs ├── robt_entry.rs ├── robt_entry_test.rs ├── robt_index.rs ├── robt_index_test.rs ├── robt_marker.rs ├── robt_test.rs ├── scans.rs ├── scans_test.rs ├── shllrb.rs ├── shllrb_test.rs ├── shrobt.rs ├── shrobt_test.rs ├── spinlock.rs ├── spinlock_test.rs ├── sync.rs ├── sync_writer.rs ├── sync_writer_test.rs ├── thread.rs ├── types.rs ├── types_test.rs ├── util.rs ├── util_test.rs ├── vlog.rs ├── vlog_test.rs ├── wal.rs └── wal_test.rs ├── bin ├── crio │ ├── cmd_fetch.rs │ ├── main.rs │ ├── profile.toml │ └── types.rs ├── diskio │ ├── main.rs │ ├── plot.rs │ └── stats.rs ├── html │ └── main.rs ├── nltk │ └── main.rs ├── pms │ ├── cmd_clone.rs │ ├── cmd_excluded.rs │ ├── cmd_status.rs │ ├── config.rs │ ├── h.rs │ └── main.rs ├── rdms │ ├── cmd_git.rs │ ├── cmd_perf.rs │ ├── latency.rs │ ├── main.rs │ ├── perf-profiles │ │ ├── default-btree-map.toml │ │ ├── default-llrb.toml │ │ ├── default-lmdb.toml │ │ ├── default-robt.toml │ │ └── default-wral.toml │ ├── perf_btree_map.rs │ ├── perf_llrb.rs │ ├── perf_lmdb.rs │ ├── perf_robt.rs │ └── perf_wral.rs └── zimf │ ├── main.rs │ └── print.rs ├── bitmaps ├── croaring.rs ├── croaring_test.rs ├── fuse16.rs ├── fuse16_test.rs ├── fuse8.rs ├── fuse8_test.rs ├── mod.rs ├── nobitmap.rs ├── xor8.rs └── xor8_test.rs ├── clru ├── README.md ├── access.rs ├── evictor.rs ├── lru.rs ├── lru_test.rs └── mod.rs ├── dba ├── entry.rs ├── git.rs ├── mod.rs └── types.rs ├── dbs ├── binary.rs ├── compact.rs ├── delta.rs ├── delta_test.rs ├── diff.rs ├── diff_test.rs ├── entry.rs ├── entry_test.rs ├── mod.rs ├── types.rs ├── types_test.rs ├── value.rs ├── value_test.rs └── wop.rs ├── error.rs ├── git ├── config.rs ├── default.toml ├── index.rs ├── mod.rs ├── repo.rs └── trie.rs ├── hash └── mod.rs ├── index.rs ├── lib.rs ├── llrb ├── depth.rs ├── depth_test.rs ├── index.rs ├── index_test.rs ├── mod.rs ├── node.rs ├── node_test.rs └── stats.rs ├── lsm ├── lsm.rs └── lsm_test.rs ├── mq ├── filter.rs ├── filter_map.rs ├── map.rs ├── mod.rs ├── reduce.rs ├── sink.rs ├── source.rs └── split.rs ├── parsec ├── lex.rs ├── mod.rs └── parse.rs ├── robt ├── build.rs ├── config.rs ├── entry.rs ├── entry_test.rs ├── files.rs ├── files_test.rs ├── flush.rs ├── flush_test.rs ├── index.rs ├── index_test.rs ├── lsm.rs ├── marker.rs ├── mod.rs ├── reader.rs ├── scans.rs ├── scans_test.rs ├── vlog.rs └── vlog_test.rs ├── sys ├── mod.rs ├── system.rs └── system_test.rs ├── trie ├── mod.rs ├── trie.rs └── trie_test.rs ├── util ├── cmdline.rs ├── files.rs ├── files_test.rs ├── mod.rs ├── mod_test.rs ├── print.rs ├── spinlock.rs ├── spinlock_test.rs └── thread.rs ├── web ├── html.rs ├── html_test.rs ├── mod.rs └── selector.rs ├── wral ├── batch.rs ├── batch_test.rs ├── entry.rs ├── entry_test.rs ├── files.rs ├── journal.rs ├── journal_test.rs ├── journals.rs ├── mod.rs ├── state.rs ├── wal.rs └── wal_test.rs └── zimf ├── mod.rs ├── workers.rs └── zim.rs /.gitignore: -------------------------------------------------------------------------------- 1 | **/*.swp 2 | */*.swo 3 | */out 4 | target 5 | Cargo.lock 6 | .vimsession 7 | .vim_session 8 | core 9 | perf-data/ 10 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the official list of `rdms` Authors for copyright purpose. 2 | 3 | prataprc 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdms" 3 | version = "0.0.1" 4 | authors = ["prataprc "] 5 | description = "Composable storage for key,value index" 6 | documentation = "https://docs.rs/rdms" 7 | homepage = "https://github.com/bnclabs/rdms" 8 | repository = "https://github.com/bnclabs/rdms" 9 | keywords = ["storage", "mvcc", "llrb", "btree", "lsm"] 10 | categories = ["data-structures", "database"] 11 | license = "AGPL-3.0" 12 | exclude = ["template/**", "docs/**"] 13 | readme = "README.md" 14 | edition = "2018" 15 | 16 | [lib] 17 | name = "rdms" 18 | path = "src/lib.rs" 19 | 20 | [[bin]] 21 | name = "rdms" 22 | required-features = ["rdms"] 23 | 24 | [[bin]] 25 | name = "pms" 26 | required-features = ["pms"] 27 | 28 | [[bin]] 29 | name = "crio" 30 | required-features = ["crio"] 31 | 32 | [[bin]] 33 | name = "zimf" 34 | required-features = ["zimf"] 35 | 36 | [[bin]] 37 | name = "html" 38 | required-features = ["html"] 39 | 40 | [[bin]] 41 | name = "nltk" 42 | required-features = ["nltk"] 43 | 44 | [[bin]] 45 | name = "diskio" 46 | required-features = ["diskio"] 47 | 48 | [badges] 49 | maintenance = { status = "actively-developed" } 50 | travis-ci = { repository = "bnclabs/rdms" } 51 | 52 | [dependencies] 53 | xorfilter-rs = { path = "../xorfilter", features = ["cbordata"] } 54 | cbordata = { version = "0.6.0" } 55 | cmap = { path = "../cmap", version = "0.3.0" } 56 | arbitrary = { version = "1", features = ["derive"] } 57 | toml = "0.5.8" 58 | lazy_static = "1.4.0" 59 | fs2 = "0.4.3" 60 | croaring = "0.5.1" 61 | cityhash-rs = "1.0.0" 62 | git2 = "0.13.20" 63 | serde = { version = "1", features = ["derive"] } 64 | chrono = "0.4.19" 65 | binread = "2.2.0" 66 | uuid = "0.8.2" 67 | rand = { version = "0.8.4", features = ["std_rng"] } 68 | xz2 = "0.1.6" 69 | zstd = "0.9.0" 70 | regex = "1.5.4" 71 | num_cpus = "1.13.1" 72 | rayon = "1.5.1" 73 | dirs = "4.0.0" 74 | sys-info = "0.9.1" 75 | url = { version = "2.2.2", features = ["serde"] } 76 | 77 | structopt = { version = "0.3.20", default-features = false, optional = true } 78 | lmdb = { version = "0.8.0", optional = true } 79 | csv = { version = "1.1", optional = true } 80 | ureq = { version = "2.3", optional = true } 81 | tar = { version = "0.4", optional = true } 82 | flate2 = { version = "1.0", optional = true } 83 | sha-1 = { version = "0.9.8", optional = true } 84 | serde_json = { version = "1.0.70", optional = true } 85 | scraper = { version = "0.12.0", optional = true } 86 | colored = { version = "2.0.0", optional = true } 87 | prettytable-rs = { version = "0.8.0", optional = true } 88 | plotters = { version = "0.3.1", optional = true } 89 | # jsondata = { path = "../jsondata", optional = true } 90 | 91 | # log = "0.4.8" 92 | # memmap = "0.7.0" 93 | 94 | [target.'cfg(not(target_env = "msvc"))'.dependencies] 95 | jemallocator = "0.3.2" 96 | 97 | [dev-dependencies] 98 | crc = "2.1.0" 99 | ppom = { path = "../ppom", version = "0.7.0", features = ["rand"] } 100 | 101 | [features] 102 | rdms = ["structopt", "prettytable-rs", "lmdb", "sha-1"] 103 | pms = ["structopt", "prettytable-rs", "colored"] 104 | crio = ["structopt", "prettytable-rs", "csv", "ureq", "tar", "flate2", "serde_json"] 105 | zimf = ["structopt", "prettytable-rs"] 106 | html = ["structopt", "prettytable-rs"] 107 | nltk = ["structopt", "prettytable-rs"] 108 | diskio = ["structopt", "plotters"] 109 | debug = [] 110 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Package not ready for stable. 2 | 3 | build: 4 | # ... build ... 5 | # TODO: cargo +stable build 6 | cargo +nightly build 7 | # 8 | # ... test ... 9 | # TODO: cargo +stable test --no-run 10 | cargo +nightly test --no-run 11 | # 12 | # ... bench ... 13 | cargo +nightly bench --no-run 14 | # 15 | # ... doc ... 16 | # TODO: cargo +stable doc 17 | cargo +nightly doc 18 | # 19 | # ... bins ... 20 | # cargo +stable build --release --bin rdms --features=rdms 21 | cargo +nightly build --release --bin rdms --features=rdms 22 | # cargo +stable build --release --bin pms --features=pms 23 | cargo +nightly build --release --bin pms --features=pms 24 | # cargo +stable build --release --bin crio --features=crio 25 | cargo +nightly build --release --bin crio --features=crio 26 | # cargo +stable build --release --bin zimf --features=zimf 27 | cargo +nightly build --release --bin zimf --features=zimf 28 | # 29 | # ... meta commands ... 30 | cargo +nightly clippy --all-targets --all-features 31 | 32 | test: 33 | # ... test ... 34 | # TODO: cargo +stable test 35 | cargo +nightly test 36 | 37 | bench: 38 | # ... bench ... 39 | # TODO: cargo +stable bench 40 | cargo +nightly bench 41 | 42 | flamegraph: 43 | cargo flamegraph --features=rdms --release --bin=rdms -- --load 1000000 --ops 10000 44 | 45 | prepare: build test bench 46 | check.sh check.out 47 | perf.sh perf.out 48 | 49 | clean: 50 | cargo clean 51 | rm -f check.out perf.out flamegraph.svg perf.data perf.data.old 52 | rm -rf perf-data 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Key Value store 2 | =============== 3 | 4 | [![License: AGPL v3](https://img.shields.io/badge/License-AGPL%20v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0) 5 | 6 | * [ ] CRUD support. 7 | * [ ] ACID compliance. 8 | * [ ] Index held only in memory, useful for caching data. 9 | * [ ] Index held in memory, with as disk backup. 10 | * [ ] Index held in disk. 11 | * [ ] Index held in disk, with working set held in memory. 12 | * [ ] Durability guarantee using Write Ahead Logging. 13 | * [ ] LSM based Multi-level storage on memory and/or disks. 14 | * [ ] Index can be compose using: 15 | * [ ] Type choice of key. 16 | * [ ] Type choice of value. 17 | * [ ] Type choice of memory data-structure. Type can be: 18 | * [ ] Left leaning red black tree. 19 | * [ ] Left leaning red black tree, with Multi-version-concurrency-control. 20 | * [ ] Skip list, with concurrent writers. 21 | * [ ] Type choice of disk data-structure. 22 | * [ ] Read only Btree. 23 | * [ ] Append only Btree. 24 | * [ ] Centralised version control for index entries. 25 | * [ ] Decentralised version control for index entries. 26 | * [ ] Value, along with delta, can be stored in separate log files. 27 | 28 | Milestones: 29 | 30 | * [1st-Milestone](https://github.com/bnclabs/rdms/issues/9) 31 | * [2nd-Milestone](https://github.com/bnclabs/rdms/issues/10) 32 | 33 | [memory-ordering]: https://doc.rust-lang.org/std/sync/atomic/enum.Ordering.html 34 | -------------------------------------------------------------------------------- /RELEASES.md: -------------------------------------------------------------------------------- 1 | 0.1.0 2 | ===== 3 | 4 | Code Review checklist 5 | ===================== 6 | 7 | * [ ] Check and confirm dead-code. 8 | * [ ] Check and confirm ignored test cases. 9 | * [ ] Check for un-necessary trait constraints like Debug and Display. 10 | * [ ] Review and check for un-necessary copy, and allocations. 11 | * [ ] Review resize calls on `Vec`. 12 | * [ ] Review (as ...) type casting, to panic on data loss. 13 | * [ ] Reduce trait constraints for Type parameters on public APIs. 14 | * [ ] Public APIs can be as generic as possible. Check whether there 15 | is a scope for `AsRef` or `Borrow` constraints. 16 | * [ ] Review/Document error variants, capture useful information in error msg. 17 | * [ ] Check for dangling links in rustdoc. 18 | * [ ] 80-column width. 19 | * [ ] Copyright and License notice. 20 | * [ ] Make sure that generated artifact is debuggable. Like, 21 | * [ ] RUSTLFAGS=-g 22 | * [ ] Verify panic!() macro, try to replace them with Err(Error). 23 | * [ ] Verify unreachable!() macro, try to replace them with Err(Error). 24 | * [ ] Avoid println!() macro in production code. 25 | * [ ] Review TODO comments in code. 26 | * [ ] Validate the usage of: 27 | * [ ] unwrap() calls. 28 | * [ ] ok() calls on Result/Option types. 29 | * [ ] unsafe { .. } blocks. 30 | * [ ] panic!(), unimplemented!(), unreachable!(), assert!() macros. 31 | * [ ] Trim trait constraits for exported types, exported functions and 32 | type/methods/functions defined in core.rs 33 | 34 | 35 | * Avoid panic!() in inner-level functions. If at all it is used, use 36 | them at the API level. 37 | 38 | Release Checklist 39 | ================= 40 | 41 | * Bump up the version: 42 | * __major__: backward incompatible API changes. 43 | * __minor__: backward compatible API Changes. 44 | * __patch__: bug fixes. 45 | * Travis-CI integration. 46 | * Cargo checklist 47 | * cargo +stable build; cargo +nightly build 48 | * cargo +stable doc 49 | * cargo +nightly clippy --all-targets --all-features 50 | * cargo +nightly test 51 | * cargo +nightly bench 52 | * cargo +nightly benchcmp 53 | * cargo fix --edition --all-targets 54 | * Cargo test `ixtest` and `ixperf` 55 | * Create a git-tag for the new version. 56 | * Cargo publish the new version. 57 | * Badges 58 | * Build passing, Travis continuous integration. 59 | * Code coverage, codecov and coveralls. 60 | * Crates badge 61 | * Downloads badge 62 | * License badge 63 | * Rust version badge. 64 | * Maintenance-related badges based on isitmaintained.com 65 | * Documentation 66 | * Gitpitch 67 | * Targets 68 | * RHEL 69 | * SUSE 70 | * Debian 71 | * Centos 72 | * Ubuntu 73 | * Mac-OS 74 | * Windows 75 | * amazon-aws 76 | * Raspberry-pi 77 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | * cargo stuff 2 | * spell checker rust documentation and other md files. 3 | * md-book 4 | * release management 5 | * configuration via features 6 | * platform binary generation 7 | * publishing onto crates.io 8 | * package dependencies 9 | * declared but unused dependencies 10 | * outdated dependencies, upgrades 11 | * licensing analysis 12 | * source code analysis 13 | * modulewise, imports. 14 | * list of types (type, struct, enum, const, static) 15 | * list of functions, traits, trait implementation 16 | * type methods, public and private. 17 | * featured gated source code items. 18 | 19 | * README: create document links in a separate section, linking to docs/ directory. 20 | * implement logging using log facade. 21 | * benches: Fix benches/ and add it as part of perf.sh 22 | * rdms-perf: add performance suite for dgm, robt, wral, shllrb, shrobt, croaring 23 | * rdms-perf: add latency measurements. 24 | * rdms-perf: plot graphs. 25 | * rdms-test: migrate test-suites from ixtest 26 | 27 | * rdms-perf for robt 28 | * try initial build with 1M, 10, 100M entries; with value as Binary(1K) 29 | * try with nobitmap, xor8 bitmaps. 30 | * try incremental build with 1M, 10, 100M entries; with value as Binary(1K) 31 | * try the incremental builds with and without compaction 32 | * measure concurrent read performance for 1, 2, 4, 8, 16 threads 33 | * try with and without lru cache. 34 | 35 | * wral: journal-limit, adjust the algorithm to not to exceed the journal limit. 36 | 37 | (a) review 5c71164f6d9e57ce60ed0030f1fa7dba7d5056b5 38 | fix errors before refactoring llrb out into ppom 39 | 40 | rdms-clru: 41 | 42 | Concurrent access to least-recently-used-cache need its backing datastructure 43 | like a disk-btree to be immutable. Otherwise, we may have to deal with 44 | synchronization problem in building the cache and evicting the entries. 45 | 46 | Access-1: 47 | Get(cache) fail 48 | Get(disk-btree) 49 | Set(cache) 50 | 51 | Access-2: 52 | Set(disk-btree) 53 | Set(cache) 54 | 55 | Access-3: 56 | Remove(disk-btree) 57 | Remove(cache) 58 | 59 | Access-4: 60 | Evict(cache) 61 | 62 | There will synchronization issues when above listed access scenarios happen 63 | concurrently on the disk-btree and the cache. 64 | -------------------------------------------------------------------------------- /benches/croaring_bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use croaring::bitmap::Bitmap; 5 | use rand::prelude::random; 6 | use test::Bencher; 7 | 8 | use std::time; 9 | 10 | #[bench] 11 | fn bench_croaring_add(b: &mut Bencher) { 12 | let mut bmap = Bitmap::create(); 13 | let mut n = 1; 14 | b.iter(|| { 15 | bmap.add(n); 16 | n += 1 17 | }); 18 | } 19 | 20 | #[bench] 21 | fn bench_croaring_contains(b: &mut Bencher) { 22 | let mut bmap = Bitmap::create(); 23 | let start = time::SystemTime::now(); 24 | let count = 100_000_000; 25 | for _i in 0..count { 26 | let n: u32 = random(); 27 | bmap.add(n) 28 | } 29 | let elapsed = start.elapsed().unwrap().as_nanos(); 30 | println!( 31 | "elapsed {} to add {} items, footprint={}", 32 | elapsed, 33 | count, 34 | bmap.cardinality() 35 | ); 36 | 37 | let mut n = 1; 38 | b.iter(|| { 39 | bmap.contains(1000); 40 | n += 1 41 | }); 42 | } 43 | -------------------------------------------------------------------------------- /benches/gate_bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use test::Bencher; 5 | 6 | use rdms::util::spinlock::Spinlock; 7 | 8 | #[bench] 9 | fn bench_spinlock_read(b: &mut Bencher) { 10 | let g = Spinlock::new(0); 11 | b.iter(|| g.read()); 12 | } 13 | 14 | #[bench] 15 | fn bench_spinlock_write(b: &mut Bencher) { 16 | let g = Spinlock::new(0); 17 | b.iter(|| g.write()); 18 | } 19 | -------------------------------------------------------------------------------- /benches/hasher_bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use test::Bencher; 5 | 6 | #[bench] 7 | fn bench_default_hasher(b: &mut Bencher) { 8 | use std::{collections::hash_map::DefaultHasher, hash::Hasher}; 9 | 10 | let mut n: u64 = 1; 11 | let mut sum = 0; 12 | b.iter(|| { 13 | let mut hasher = DefaultHasher::new(); 14 | hasher.write(&n.to_be_bytes()); 15 | sum += hasher.finish(); 16 | n += 1 17 | }); 18 | } 19 | 20 | #[bench] 21 | fn bench_crc32_hasher(b: &mut Bencher) { 22 | let mut n: u64 = 1; 23 | let mut sum = 0; 24 | let val = crc::Crc::::new(&crc::CRC_32_CKSUM); 25 | b.iter(|| { 26 | sum += val.checksum(&n.to_be_bytes()); 27 | n += 1; 28 | }); 29 | } 30 | -------------------------------------------------------------------------------- /benches/jealloc_bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use test::Bencher; 5 | 6 | use std::{ 7 | alloc::{GlobalAlloc, Layout}, 8 | mem, 9 | sync::{mpsc, Arc}, 10 | thread, 11 | time::SystemTime, 12 | }; 13 | 14 | struct Node { 15 | _field1: [u8; 12], 16 | _field2: [u8; 24], 17 | _field3: [u8; 48], 18 | _field4: [u8; 20], 19 | } 20 | 21 | #[bench] 22 | fn bench_je_alloc(b: &mut Bencher) { 23 | let je = jemallocator::Jemalloc; 24 | 25 | let start = SystemTime::now(); 26 | let n = 1_000_000; 27 | for _i in 0..n { 28 | unsafe { je.alloc(Layout::new::()) }; 29 | } 30 | println!("took {:?} to allocate {} blocks", start.elapsed().unwrap(), n); 31 | 32 | b.iter(|| unsafe { je.alloc(Layout::new::()) }); 33 | } 34 | 35 | #[bench] 36 | fn bench_je_alloc_free(b: &mut Bencher) { 37 | let je = jemallocator::Jemalloc; 38 | let start = SystemTime::now(); 39 | let n = 1_000_000; 40 | for _i in 0..n { 41 | unsafe { 42 | let lt = Layout::new::(); 43 | let ptr = je.alloc(lt); 44 | je.dealloc(ptr, lt); 45 | } 46 | } 47 | println!("took {:?} to allocate/free {} blocks", start.elapsed().unwrap(), n); 48 | 49 | b.iter(|| unsafe { 50 | let lt = Layout::new::(); 51 | let ptr = je.alloc(lt); 52 | je.dealloc(ptr, lt); 53 | }); 54 | } 55 | 56 | #[bench] 57 | fn bench_je_alloc_cc2(b: &mut Bencher) { 58 | let je_tx = Arc::new(jemallocator::Jemalloc); 59 | let je_rx = Arc::clone(&je_tx); 60 | 61 | let (tx, rx) = mpsc::channel(); 62 | let handle = thread::spawn(move || { 63 | let lt = Layout::new::(); 64 | for ptr in rx { 65 | let ptr = Box::leak(ptr); 66 | unsafe { je_rx.dealloc(ptr, lt) }; 67 | } 68 | }); 69 | 70 | let start = SystemTime::now(); 71 | let n = 1_000_000; 72 | for _i in 0..n { 73 | unsafe { 74 | let lt = Layout::new::(); 75 | tx.send(Box::from_raw(je_tx.alloc(lt))).unwrap(); 76 | } 77 | } 78 | mem::drop(tx); 79 | 80 | println!("took {:?} to allocate {} blocks", start.elapsed().unwrap(), n); 81 | handle.join().unwrap(); 82 | println!("took {:?} to free {} blocks", start.elapsed().unwrap(), n); 83 | 84 | let je = jemallocator::Jemalloc; 85 | b.iter(|| unsafe { 86 | let lt = Layout::new::(); 87 | let ptr = je.alloc(lt); 88 | je.dealloc(ptr, lt); 89 | }); 90 | } 91 | -------------------------------------------------------------------------------- /benches/std_benches.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use std::time::SystemTime; 5 | use test::Bencher; 6 | 7 | #[bench] 8 | fn bench_systemtime_now(b: &mut Bencher) { 9 | b.iter(SystemTime::now); 10 | } 11 | 12 | #[bench] 13 | fn bench_systemtime_elapsed(b: &mut Bencher) { 14 | let now = SystemTime::now(); 15 | b.iter(|| now.elapsed()); 16 | } 17 | -------------------------------------------------------------------------------- /benches/util_benches.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | extern crate test; 3 | 4 | use rand::prelude::random; 5 | use test::Bencher; 6 | 7 | #[bench] 8 | fn bench_1_kb(b: &mut Bencher) { 9 | let src: Vec = (0..1024).map(|x| x as u8).collect(); 10 | let mut dst: Vec = Vec::with_capacity(src.len()); 11 | dst.resize(dst.capacity(), 0); 12 | b.iter(|| dst.copy_from_slice(&src)); 13 | } 14 | 15 | #[bench] 16 | fn bench_1_mb(b: &mut Bencher) { 17 | let src: Vec = (0..(1024 * 1024)).map(|x| x as u8).collect(); 18 | let mut dst: Vec = Vec::with_capacity(src.len()); 19 | dst.resize(dst.capacity(), 0); 20 | b.iter(|| dst.copy_from_slice(&src)); 21 | } 22 | 23 | #[bench] 24 | fn bench_random_64(b: &mut Bencher) { 25 | b.iter(|| busy_loop(1000)); 26 | } 27 | 28 | #[bench] 29 | fn bench_atomicptr(b: &mut Bencher) { 30 | let ptr = Box::leak(Box::new(10_u32)); 31 | let val = std::sync::atomic::AtomicPtr::::new(ptr); 32 | b.iter(|| { 33 | val.store(ptr, std::sync::atomic::Ordering::SeqCst); 34 | val.load(std::sync::atomic::Ordering::SeqCst); 35 | }); 36 | } 37 | 38 | fn busy_loop(count: usize) -> u64 { 39 | let acc: u64 = (0..count).map(|_| random::() as u64).sum(); 40 | acc 41 | } 42 | -------------------------------------------------------------------------------- /docs/PughConcurrentSkiplist.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/PughConcurrentSkiplist.pdf -------------------------------------------------------------------------------- /docs/PughSkiplistPaper.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/PughSkiplistPaper.pdf -------------------------------------------------------------------------------- /docs/RBTreesKim.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/RBTreesKim.pdf -------------------------------------------------------------------------------- /docs/Sedgewick-LLRB.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/Sedgewick-LLRB.pdf -------------------------------------------------------------------------------- /docs/Sedgewick-Talk-Penn2008.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/Sedgewick-Talk-Penn2008.pdf -------------------------------------------------------------------------------- /docs/git2.md: -------------------------------------------------------------------------------- 1 | Concepts 2 | -------- 3 | 4 | object{header,sha1}, blob-object, tree-object, commit-object, tag-object 5 | 6 | references, `refs/heads/`, `refs/remotes/`, `refs/tags/` 7 | 8 | merge, rebase, annotated-commits, action-signature-{name,email,timestamp} 9 | 10 | namespace 11 | work-tree 12 | revision-spec 13 | merge-message 14 | remote, remote-fetch, in-memory-remote 15 | 16 | ignore-rules 17 | 18 | Types 19 | ----- 20 | 21 | AnnotatedCommit, Signature 22 | 23 | Diff, DiffHunk, DiffDelta, ApplyLocation, ApplyOptions 24 | 25 | Repository, ObjectType, Oid, Tree, Commit, Index 26 | Blame, BlameOptions, BlameHunk, BlameIter 27 | 28 | Git as DB 29 | --------- 30 | 31 | * Has to be content addressed database. 32 | * Commit oriented, similar to transactions in regular Key-Value store. 33 | * Commits are time-ordered via parent child relationshipt, a single commit can have multiple parents. 34 | * This also implies that we may not need sequence-numbering. 35 | * Each file can be treated as a document and stored as a blob object-type. 36 | * Document as value, must emit a unique key, that can be treated as file path. 37 | 38 | ``` 39 | instance-api | new, close, purge 40 | management-api | len, deleted_count, footprint, is_empty, is_spin, to_name, to_seqno, to_stats, validate 41 | read-api | get, get_versions, iter, iter_versions range range_versions, reverse, reverse_versions 42 | write-api | set, set_cas, insert, insert_cas delete, delete_cas, remove, remove_cas 43 | transaction-api | commit 44 | ``` 45 | 46 | Repository 47 | odb, set_odb 48 | blob, blob_path, find_blob 49 | 50 | 51 | Objects 52 | ------- 53 | 54 | * Objects are immutable, signed and compressed-(zlib). 55 | * Commit objects are DAG, which means they can have more than one parent. 56 | * Tag can also be GPG signed. 57 | 58 | 59 | blob [content-size]\0 tree [content-size]\0 commit [content-size]\0 tag [content-size]\0 60 | --------------------- --------------------- ----------------------- -------------------- 61 | Simple text 100644 blob a906cb README tree 1a738d object 0576fa 62 | 100644 blob a874b7 Rakefile parent a11bef type commit 63 | 040000 tree fe8971 lib author Scott Chacon tag v0.1 64 | 1205602288 tagger Scott Chacon 65 | committer Scott Chacon 1205624655 66 | 1205602288 this is my v0.1 tag 67 | first commit 68 | 69 | 70 | +------+ 71 | +----------| Head |------------+ 72 | | +------+ | 73 | | | | 74 | | | | 75 | +--------+ +--------+ +--------+ 76 | | Remote | | Branch | | Tag | 77 | +--------+ +--------+ +--------+ 78 | | | | 79 | | | | 80 | | +--------+ | 81 | +---------| Commit |-----------+ 82 | +--------+ 83 | | 84 | | 85 | | 86 | +--------+ 87 | +---| Tree |---+ 88 | | +--------+ | 89 | +--------|-------+ 90 | | 91 | +--------+ 92 | | Blobs | 93 | +--------+ 94 | 95 | -------------------------------------------------------------------------------- /docs/osdi14-paper-zheng_mai.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/prataprc/rdms/613432c249f621fefecdf52c2d2e3bb46cc9b660/docs/osdi14-paper-zheng_mai.pdf -------------------------------------------------------------------------------- /docs/reference.md: -------------------------------------------------------------------------------- 1 | Introduction to LLRB 2 | https://www.geeksforgeeks.org/left-leaning-red-black-tree-insertion/ 3 | 4 | http://www.actapress.com/Abstract.aspx?paperId=453069 5 | 6 | Presentation on torturing databases. 7 | https://www.usenix.org/conference/osdi14/technical-sessions/presentation/zheng_mai 8 | 9 | Left-Leaning Red-Black Trees Considered Harmful ? 10 | http://read.seas.harvard.edu/~kohler/notes/llrb.html 11 | 12 | Log structured file system. 13 | https://people.eecs.berkeley.edu/~brewer/cs262/LFS.pdf 14 | 15 | LSM tree 16 | https://www.cs.umb.edu/~poneil/lsmtree.pdf 17 | 18 | data-structures/algorithm 19 | ========================= 20 | 21 | https://en.wikipedia.org/wiki/Bitcask 22 | -------------------------------------------------------------------------------- /docs/serialize_versions.md: -------------------------------------------------------------------------------- 1 | src/dbs 2 | ====== 3 | 4 | * `dbs::Value` `0x00020001_u32` 5 | * `dbs::Delta` `0x00030001_u32` 6 | * `dbs::Entry` `0x00050001_u32` 7 | * `dbs::NoDiff` `0x00070001_u32` 8 | * `dbs::Binary` `0x00170001_u32` 9 | 10 | src/robt 11 | ======== 12 | 13 | * `robt::Value` `0x000d0001_u32` 14 | * `robt::Delta` `0x00110001_u32` 15 | * `robt::Entry` `0x00130001_u32` 16 | * `robt::Stats` `0x000b0001_u32` 17 | * `robt::MetaItem` `robt/metaitem/0.0.1` 18 | 19 | src/bitmaps 20 | =========== 21 | 22 | * `xorfilter::Xor8` `xor8/0.0.1` 23 | 24 | src/wral 25 | ======== 26 | * `wral::Entry` `0x001d0001_u32` 27 | * `wral::Batch` `0x001f0001_u32` 28 | * `wral::NoState` `0x00250001_u32` 29 | -------------------------------------------------------------------------------- /perf.sh: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | exec > $1 4 | exec 2>&1 5 | 6 | set -o xtrace 7 | 8 | PROF_DIR = src/bin/rdms/perf-profiles 9 | 10 | # regular benchmark 11 | # date; time cargo +nightly bench -- --nocapture || exit $? 12 | # TODO: date; time cargo +stable bench -- --nocapture || exit $? 13 | 14 | # invoke perf binary 15 | date; time cargo +nightly run --release --bin rdms --features=rdms -- --profile $PROF_DIR/default-llrb.toml llrb 16 | # invoke perf binary, with valgrid 17 | date; valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes cargo +nightly run --release --bin rdms --features=rdms -- --profile $PROF_DIR/default-llrb.toml llrb 18 | -------------------------------------------------------------------------------- /quick_push.sh: -------------------------------------------------------------------------------- 1 | git add . 2 | git commitv . -m "development commit" 3 | git push -u origin master 4 | 5 | -------------------------------------------------------------------------------- /run_nightly.sh: -------------------------------------------------------------------------------- 1 | if [ $? -eq 0 ] ; then 2 | echo "cargo test ...................." 3 | cargo test 4 | fi 5 | 6 | #if [ $? -eq 0 ] ; then 7 | # echo "cargo test -- --ignored .................." 8 | # cargo test -- --ignored 9 | #fi 10 | 11 | if [ $? -ne 0 ] ; then 12 | exit 1 13 | fi 14 | 15 | echo "cargo test --release ....................." 16 | RUSTFLAGS=-g cargo test --release 17 | 18 | if [ $? -ne 0 ] ; then 19 | exit 1 20 | fi 21 | 22 | echo "cargo test --release -- --ignored .................." 23 | RUSTFLAGS=-g cargo test --release -- --ignored 24 | 25 | if [ $? -ne 0 ] ; then 26 | exit 1 27 | fi 28 | 29 | echo "cargo test --release shrobt_commit_compact .................." 30 | for i in 0 0 0 0 0 0 0 0 0; do 31 | RUSTFLAGS=-g cargo test --release shrobt_commit_compact; 32 | if [ $? -ne 0 ] ; then 33 | exit 1 34 | fi 35 | done 36 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 90 2 | array_width = 80 3 | attr_fn_like_width = 80 4 | chain_width = 80 5 | fn_call_width = 80 6 | single_line_if_else_max_width = 80 7 | struct_lit_width = 50 8 | -------------------------------------------------------------------------------- /src/_archive/dlog_entry_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | use std::io::Write; 4 | 5 | #[test] 6 | fn test_entry() { 7 | let _r_entry = DEntry::::default(); 8 | 9 | let r_entry = DEntry::::new(10, 20); 10 | 11 | { 12 | let entry = DEntry::::new(10, 20); 13 | assert_eq!(r_entry.seqno, entry.seqno); 14 | assert_eq!(r_entry.op, entry.op); 15 | 16 | let (seqno, op) = entry.into_seqno_op(); 17 | assert_eq!(seqno, 10); 18 | assert_eq!(op, 20); 19 | } 20 | 21 | let mut buf = vec![]; 22 | let n = r_entry.encode(&mut buf).unwrap(); 23 | assert_eq!(n, 16); 24 | 25 | { 26 | let mut entry: DEntry = Default::default(); 27 | entry.decode(&buf).unwrap(); 28 | assert_eq!(entry.seqno, r_entry.seqno); 29 | assert_eq!(entry.op, r_entry.op); 30 | } 31 | } 32 | 33 | #[test] 34 | fn test_batch1() { 35 | use crate::wal; 36 | 37 | let batch1: Batch> = Default::default(); 38 | let batch2: Batch> = Default::default(); 39 | assert!(batch1 == batch2); 40 | } 41 | 42 | #[test] 43 | fn test_batch2() { 44 | use crate::wal; 45 | 46 | let validate = |abatch: Batch>| { 47 | abatch 48 | .into_entries() 49 | .unwrap() 50 | .into_iter() 51 | .enumerate() 52 | .for_each(|(i, e)| { 53 | let (seqno, op) = e.into_seqno_op(); 54 | assert_eq!(seqno, (i + 1) as u64); 55 | assert_eq!(op, wal::Op::::new_set(10, 20)); 56 | }) 57 | }; 58 | 59 | let batch = { 60 | let mut batch = Batch::>::default_active(); 61 | 62 | assert_eq!(batch.len().unwrap(), 0); 63 | 64 | for i in 0..100 { 65 | let op = wal::Op::new_set(10, 20); 66 | batch.add_entry(DEntry::new(i + 1, op)).unwrap(); 67 | } 68 | batch 69 | }; 70 | assert_eq!(batch.to_first_seqno().unwrap(), 1); 71 | assert_eq!(batch.to_last_seqno().unwrap(), 100); 72 | assert_eq!(batch.len().unwrap(), 100); 73 | 74 | validate(batch.clone()); 75 | 76 | let mut buf = vec![]; 77 | let length = batch.encode_active(&mut buf).unwrap(); 78 | assert_eq!(length, 4099); 79 | 80 | let file = { 81 | let mut dir = std::env::temp_dir(); 82 | dir.push("test-dlog-entry-batch2"); 83 | fs::create_dir_all(&dir).unwrap(); 84 | dir.push("batch2.dlog"); 85 | dir.into_os_string() 86 | }; 87 | fs::File::create(&file).unwrap().write(&buf).unwrap(); 88 | 89 | let rbatch = Batch::>::new_refer( 90 | // 91 | 0, length, 1, 100, 92 | ); 93 | let mut fd = fs::File::open(&file).unwrap(); 94 | let abatch = rbatch.into_active(&mut fd).unwrap(); 95 | validate(abatch); 96 | 97 | let mut batch = Batch::>::default_active(); 98 | let n = batch.decode_refer(&buf, 0).unwrap(); 99 | assert_eq!(n, 4099); 100 | match batch { 101 | Batch::Refer { 102 | fpos: 0, 103 | length: 4099, 104 | start_seqno: 1, 105 | last_seqno: 100, 106 | } => (), 107 | Batch::Refer { 108 | fpos, 109 | length, 110 | start_seqno, 111 | last_seqno, 112 | } => panic!("{} {} {} {}", fpos, length, start_seqno, last_seqno), 113 | _ => unreachable!(), 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/_archive/dlog_marker.rs: -------------------------------------------------------------------------------- 1 | lazy_static! { 2 | static ref DLOG_BATCH_MARKER: Vec = { 3 | let marker = "செய்வன திருந்தச் செய்"; 4 | marker.as_bytes().to_vec() 5 | }; 6 | } 7 | -------------------------------------------------------------------------------- /src/_archive/fs2_test.rs: -------------------------------------------------------------------------------- 1 | use fs2::FileExt; 2 | 3 | use std::{fs, io}; 4 | 5 | #[test] 6 | fn test_file_lock() { 7 | let file = { 8 | let mut dir = std::env::temp_dir(); 9 | dir.push("fs2-test-write-lock.data"); 10 | dir.into_os_string() 11 | }; 12 | println!("lock experiment with file {:?}", file); 13 | fs::remove_file(&file).ok(); 14 | 15 | // shared lock 1 16 | let fd1 = { 17 | let mut opts = fs::OpenOptions::new(); 18 | opts.append(true) 19 | .create_new(true) 20 | .open(file.clone()) 21 | .unwrap() 22 | }; 23 | fd1.lock_shared().unwrap(); 24 | 25 | // shared lock 2 26 | let fd2 = { 27 | let mut opts = fs::OpenOptions::new(); 28 | opts.write(true).open(file.clone()).unwrap() 29 | }; 30 | match fd2.try_lock_exclusive() { 31 | Ok(_) => panic!("unexpected behaviour!!"), 32 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 33 | Err(err) => panic!("unexpected err: {:?}", err), 34 | } 35 | fd2.lock_shared().unwrap(); 36 | 37 | // shared lock 3 38 | let fd3 = { 39 | let mut opts = fs::OpenOptions::new(); 40 | opts.write(true).open(file.clone()).unwrap() 41 | }; 42 | match fd3.try_lock_exclusive() { 43 | Ok(_) => panic!("unexpected behaviour!!"), 44 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 45 | Err(err) => panic!("unexpected err: {:?}", err), 46 | } 47 | 48 | // unlock as shared lock 2 49 | fd2.unlock().unwrap(); 50 | match fd2.try_lock_exclusive() { 51 | Ok(_) => panic!("unexpected behaviour!!"), 52 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 53 | Err(err) => panic!("unexpected err: {:?}", err), 54 | } 55 | match fd3.try_lock_exclusive() { 56 | Ok(_) => panic!("unexpected behaviour!!"), 57 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 58 | Err(err) => panic!("unexpected err: {:?}", err), 59 | } 60 | 61 | // unlock shared lock 1 62 | fd1.unlock().unwrap(); 63 | match fd3.try_lock_exclusive() { 64 | Ok(_) => (), 65 | Err(err) => panic!("unexpected err: {:?}", err), 66 | } 67 | fd3.unlock().unwrap(); 68 | match fd2.try_lock_exclusive() { 69 | Ok(_) => (), 70 | Err(err) => panic!("unexpected err: {:?}", err), 71 | } 72 | match fd1.try_lock_exclusive() { 73 | Ok(_) => panic!("unexpected behaviour!!"), 74 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 75 | Err(err) => panic!("unexpected err: {:?}", err), 76 | } 77 | match fd1.try_lock_shared() { 78 | Ok(_) => panic!("unexpected behaviour!!"), 79 | Err(err) if err.kind() == io::ErrorKind::WouldBlock => (), 80 | Err(err) => panic!("unexpected err: {:?}", err), 81 | } 82 | 83 | fd2.unlock().unwrap(); 84 | } 85 | -------------------------------------------------------------------------------- /src/_archive/panic.rs: -------------------------------------------------------------------------------- 1 | //! Module `panic` handles unimplemented features. 2 | 3 | use std::{borrow::Borrow, ops::RangeBounds}; 4 | 5 | use crate::{ 6 | core::{CommitIterator, Diff, Entry, IndexIter, Reader, Result, Writer}, 7 | error::Error, 8 | }; 9 | 10 | /// Placeholder type, to handle unimplemented features. 11 | pub struct Panic(String); 12 | 13 | impl Panic { 14 | pub fn new(name: &str) -> Panic { 15 | Panic(name.to_string()) 16 | } 17 | } 18 | 19 | // Write methods 20 | impl Writer for Panic 21 | where 22 | K: Clone + Ord, 23 | V: Clone + Diff, 24 | { 25 | fn set(&mut self, _key: K, _value: V) -> Result>> { 26 | err_at!(NotImplemented, msg:self.0) 27 | } 28 | 29 | fn set_cas(&mut self, _: K, _: V, _: u64) -> Result>> { 30 | err_at!(NotImplemented, msg:self.0) 31 | } 32 | 33 | fn delete(&mut self, _key: &Q) -> Result>> 34 | where 35 | K: Borrow, 36 | Q: ToOwned + Ord + ?Sized, 37 | { 38 | err_at!(NotImplemented, msg:self.0) 39 | } 40 | } 41 | 42 | impl Reader for Panic 43 | where 44 | K: Clone + Ord, 45 | V: Clone + Diff, 46 | { 47 | fn get(&mut self, _: &Q) -> Result> 48 | where 49 | K: Borrow, 50 | Q: Ord + ?Sized, 51 | { 52 | err_at!(NotImplemented, msg:self.0) 53 | } 54 | 55 | fn iter(&mut self) -> Result> { 56 | err_at!(NotImplemented, msg:self.0) 57 | } 58 | 59 | fn range<'a, R, Q>(&'a mut self, _: R) -> Result> 60 | where 61 | K: Borrow, 62 | R: 'a + Clone + RangeBounds, 63 | Q: 'a + Ord + ?Sized, 64 | { 65 | err_at!(NotImplemented, msg:self.0) 66 | } 67 | 68 | fn reverse<'a, R, Q>(&'a mut self, _: R) -> Result> 69 | where 70 | K: Borrow, 71 | R: 'a + Clone + RangeBounds, 72 | Q: 'a + Ord + ?Sized, 73 | { 74 | err_at!(NotImplemented, msg:self.0) 75 | } 76 | 77 | fn get_with_versions(&mut self, _: &Q) -> Result> 78 | where 79 | K: Borrow, 80 | Q: Ord + ?Sized, 81 | { 82 | err_at!(NotImplemented, msg:self.0) 83 | } 84 | 85 | fn iter_with_versions(&mut self) -> Result> { 86 | err_at!(NotImplemented, msg:self.0) 87 | } 88 | 89 | fn range_with_versions<'a, R, Q>(&'a mut self, _: R) -> Result> 90 | where 91 | K: Borrow, 92 | R: 'a + Clone + RangeBounds, 93 | Q: 'a + Ord + ?Sized, 94 | { 95 | err_at!(NotImplemented, msg:self.0) 96 | } 97 | 98 | fn reverse_with_versions<'a, R, Q>(&'a mut self, _: R) -> Result> 99 | where 100 | K: Borrow, 101 | R: 'a + Clone + RangeBounds, 102 | Q: 'a + Ord + ?Sized, 103 | { 104 | err_at!(NotImplemented, msg:self.0) 105 | } 106 | } 107 | 108 | impl CommitIterator for Panic 109 | where 110 | K: Clone + Ord, 111 | V: Clone + Diff, 112 | { 113 | fn scan(&mut self, _within: G) -> Result> 114 | where 115 | G: Clone + RangeBounds, 116 | { 117 | err_at!(NotImplemented, msg:self.0) 118 | } 119 | 120 | fn scans(&mut self, _n_shards: usize, _within: G) -> Result>> 121 | where 122 | G: Clone + RangeBounds, 123 | { 124 | err_at!(NotImplemented, msg:self.0) 125 | } 126 | 127 | fn range_scans( 128 | &mut self, 129 | _ranges: Vec, 130 | _within: G, 131 | ) -> Result>> 132 | where 133 | G: Clone + RangeBounds, 134 | N: Clone + RangeBounds, 135 | { 136 | err_at!(NotImplemented, msg:self.0) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /src/_archive/raft_log_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_state() { 5 | let state: State = Default::default(); 6 | assert_eq!(state.term, NIL_TERM); 7 | assert_eq!(state.committed, 0); 8 | assert_eq!(state.persisted, 0); 9 | assert_eq!(state.config, vec![]); 10 | assert_eq!(state.votedfor, DEFAULT_NODE.to_string()); 11 | 12 | let state = State { 13 | term: 0x1234, 14 | committed: 0x2341, 15 | persisted: 0x3211, 16 | config: vec!["node1".to_string(), "node2".to_string()], 17 | votedfor: DEFAULT_NODE.to_string(), 18 | }; 19 | 20 | let mut buf = vec![]; 21 | assert_eq!(state.encode(&mut buf), 0); 22 | let mut dec_state: State = Default::default(); 23 | assert_eq!(dec_state.decode(&buf), state); 24 | 25 | assert_eq!(dec_state.term, state.term); 26 | assert_eq!(dec_state.committed, state.committed); 27 | assert_eq!(dec_state.persisted, state.persisted); 28 | assert_eq!(dec_state.config, state.config); 29 | assert_eq!(dec_state.votedfor, state.votedfor); 30 | } 31 | 32 | #[test] 33 | fn test_op_type() { 34 | let op_type: OpType = From::from(1_u64); 35 | assert_eq!(op_type, OpType::Set); 36 | let op_type: OpType = From::from(2_u64); 37 | assert_eq!(op_type, OpType::SetCAS); 38 | let op_type: OpType = From::from(3_u64); 39 | assert_eq!(op_type, OpType::Delete); 40 | } 41 | 42 | #[test] 43 | fn test_op() { 44 | let mut out = vec![]; 45 | let mut res: Op = Default::default(); 46 | 47 | let op: Op = Op::new_set(34, 43); 48 | op.encode(&mut out).unwrap(); 49 | assert_eq!(Op::::op_type(&out).unwrap(), OpType::Set); 50 | let n = res.decode(&out).expect("op-set decode failed"); 51 | assert_eq!(n, 24); 52 | match res { 53 | Op::Set { key: 34, value: 43 } => (), 54 | _ => unreachable!(), 55 | } 56 | 57 | let op: Op = Op::new_set_cas(-34, -43, 100); 58 | out.resize(0, 0); 59 | op.encode(&mut out).unwrap(); 60 | assert_eq!(Op::::op_type(&out).unwrap(), OpType::SetCAS); 61 | let n = res.decode(&out).expect("op-set-cas decode failed"); 62 | assert_eq!(n, 32); 63 | match res { 64 | Op::SetCAS { 65 | key: -34, 66 | value: -43, 67 | cas: 100, 68 | } => (), 69 | _ => unreachable!(), 70 | } 71 | 72 | let op: Op = Op::new_delete(34); 73 | out.resize(0, 0); 74 | op.encode(&mut out).unwrap(); 75 | assert_eq!(Op::::op_type(&out).unwrap(), OpType::Delete); 76 | let n = res.decode(&out).expect("op-delete decode failed"); 77 | assert_eq!(n, 12); 78 | match res { 79 | Op::Delete { key: 34 } => (), 80 | _ => unreachable!(), 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/_archive/robt_marker.rs: -------------------------------------------------------------------------------- 1 | lazy_static! { 2 | pub(crate) static ref ROOT_MARKER: Vec = { 3 | let marker = "அறம் செய விரும்பு"; 4 | marker.as_bytes().to_vec() 5 | }; 6 | } 7 | -------------------------------------------------------------------------------- /src/_archive/sync.rs: -------------------------------------------------------------------------------- 1 | //! Module `sync` implements synchronisation primitive to implement 2 | //! complex index types. 3 | 4 | // TODO: deprecate this module, after implementing Dgm and Backup 5 | 6 | use std::{ffi, mem, sync::Arc}; 7 | 8 | pub struct SyncAccess { 9 | value: T, 10 | } 11 | 12 | impl SyncAccess { 13 | pub fn new(value: T) -> SyncAccess { 14 | SyncAccess { value } 15 | } 16 | } 17 | 18 | impl AsRef for SyncAccess 19 | where 20 | T: AsRef, 21 | { 22 | fn as_ref(&self) -> &U { 23 | self.value.as_ref() 24 | } 25 | } 26 | 27 | impl AsMut for SyncAccess 28 | where 29 | T: AsMut, 30 | { 31 | fn as_mut(&mut self) -> &mut U { 32 | self.value.as_mut() 33 | } 34 | } 35 | 36 | pub struct CCMu { 37 | inner: mem::MaybeUninit>>>, 38 | } 39 | 40 | impl CCMu { 41 | pub fn uninit() -> CCMu { 42 | CCMu { 43 | inner: mem::MaybeUninit::uninit(), 44 | } 45 | } 46 | 47 | pub fn init_with_ptr(value: Box) -> CCMu { 48 | CCMu { 49 | inner: mem::MaybeUninit::new(Arc::new(mem::ManuallyDrop::new(value))), 50 | } 51 | } 52 | 53 | pub fn clone(mu: &CCMu) -> CCMu { 54 | CCMu { 55 | inner: mem::MaybeUninit::new(Arc::clone(unsafe { 56 | mu.inner.as_ptr().as_ref().unwrap() 57 | })), 58 | } 59 | } 60 | 61 | pub fn strong_count(&self) -> usize { 62 | Arc::strong_count(unsafe { self.inner.as_ptr().as_ref().unwrap() }) 63 | } 64 | 65 | pub fn as_mut_ptr(&self) -> *mut ffi::c_void { 66 | let arc_ref = unsafe { self.inner.as_ptr().as_ref().unwrap() }; 67 | let ptr: &ffi::c_void = arc_ref.as_ref().as_ref(); 68 | ptr as *const ffi::c_void as *mut ffi::c_void 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/_archive/sync_writer.rs: -------------------------------------------------------------------------------- 1 | use std::sync::atomic::{ 2 | AtomicU64, 3 | Ordering::{AcqRel, Acquire}, 4 | }; 5 | 6 | /// SyncWriter is used to make sure that only one writer is going to 7 | /// access MVCC index. Calling lock() from more than one thread will 8 | /// cause panic. It is better to deligate all write operations to 9 | /// single thread as opposed to serializing the write operations from 10 | /// multiple threads. 11 | #[allow(dead_code)] // TODO: do we really need this type for rdms ?? 12 | pub(crate) struct SyncWriter { 13 | writers: AtomicU64, 14 | } 15 | 16 | #[allow(dead_code)] 17 | impl SyncWriter { 18 | pub(crate) fn new() -> SyncWriter { 19 | SyncWriter { 20 | writers: AtomicU64::new(0), 21 | } 22 | } 23 | 24 | pub(crate) fn lock<'a>(&'a self) -> Fence<'a> { 25 | if self.writers.compare_exchange(0, 1, AcqRel, Acquire) != Ok(0) { 26 | panic!("Mvcc cannot have concurrent writers"); 27 | } 28 | Fence { fence: self } 29 | } 30 | } 31 | 32 | pub(crate) struct Fence<'a> { 33 | fence: &'a SyncWriter, 34 | } 35 | 36 | impl<'a> Drop for Fence<'a> { 37 | fn drop(&mut self) { 38 | if self.fence.writers.compare_exchange(1, 0, AcqRel, Acquire) != Ok(1) { 39 | panic!("unepxected situation in spinlock drop"); 40 | } 41 | } 42 | } 43 | 44 | #[cfg(test)] 45 | #[path = "sync_writer_test.rs"] 46 | mod sync_writer_test; 47 | -------------------------------------------------------------------------------- /src/_archive/sync_writer_test.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::sync_writer::SyncWriter; 4 | 5 | #[test] 6 | fn test_sync_writer_single() { 7 | let mu = SyncWriter::new(); 8 | { 9 | let _lock = mu.lock(); 10 | } 11 | let _lock = mu.lock(); 12 | } 13 | 14 | #[test] 15 | #[should_panic] 16 | fn test_sync_writer_concur() { 17 | let mu1 = Arc::new(SyncWriter::new()); 18 | let mu2 = Arc::clone(&mu1); 19 | let _lock1 = mu1.lock(); 20 | let _lock2 = mu2.lock(); 21 | } 22 | -------------------------------------------------------------------------------- /src/_archive/types_test.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | core::{Diff, Footprint, Serialize}, 3 | types::Empty, 4 | }; 5 | 6 | #[test] 7 | fn test_bytes_diff() { 8 | let old = "hello world".as_bytes().to_vec(); 9 | let new = "welcome".as_bytes().to_vec(); 10 | let diff = new.diff(&old); 11 | assert_eq!(diff.as_slice(), "hello world".as_bytes()); 12 | 13 | assert_eq!(old, new.merge(&diff)); 14 | } 15 | 16 | #[test] 17 | fn test_bytes_serialize() { 18 | let value = "hello world".as_bytes().to_vec(); 19 | let mut buf = vec![]; 20 | value.encode(&mut buf).unwrap(); 21 | let value_ref = [ 22 | 0, 0, 0, 11, 104, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 23 | ]; 24 | assert_eq!(&value_ref[..], buf.as_slice()); 25 | 26 | let mut out = vec![]; 27 | out.decode(&buf).expect("failed decode"); 28 | assert_eq!(value, out); 29 | } 30 | 31 | #[test] 32 | fn test_bytes_footprint() { 33 | let mut value: Vec = vec![]; 34 | assert_eq!(value.footprint().unwrap(), 0); 35 | 36 | value.extend_from_slice(&[10, 20, 30]); 37 | assert_eq!(value.footprint().unwrap(), 3); 38 | } 39 | 40 | #[test] 41 | fn test_empty_diff() { 42 | let old = Empty; 43 | let new = Empty; 44 | let diff = new.diff(&old); 45 | assert_eq!(diff, Empty); 46 | 47 | assert_eq!(old, new.merge(&diff)); 48 | } 49 | 50 | #[test] 51 | fn test_empty_serialize() { 52 | let value = Empty; 53 | let mut buf = vec![]; 54 | value.encode(&mut buf).unwrap(); 55 | 56 | let mut out = Empty; 57 | out.decode(&buf).expect("failed decode"); 58 | assert_eq!(value, out); 59 | } 60 | 61 | #[test] 62 | fn test_empty_footprint() { 63 | let value = Empty; 64 | assert_eq!(value.footprint().unwrap(), 0); 65 | } 66 | 67 | #[test] 68 | fn test_i32_diff() { 69 | let old = 10_i32; 70 | let new = -20_i32; 71 | let diff = new.diff(&old); 72 | assert_eq!(diff, 10); 73 | 74 | assert_eq!(old, new.merge(&diff)); 75 | } 76 | 77 | #[test] 78 | fn test_i32_serialize() { 79 | let value = 10_i32; 80 | let mut buf = vec![]; 81 | value.encode(&mut buf).unwrap(); 82 | 83 | let mut out: i32 = Default::default(); 84 | out.decode(&buf).expect("failed decode"); 85 | assert_eq!(value, out); 86 | } 87 | 88 | #[test] 89 | fn test_i32_footprint() { 90 | let value = 0_i32; 91 | assert_eq!(value.footprint().unwrap(), 0); 92 | } 93 | 94 | #[test] 95 | fn test_i64_diff() { 96 | let old = 10_i64; 97 | let new = -20_i64; 98 | let diff = new.diff(&old); 99 | assert_eq!(diff, 10); 100 | 101 | assert_eq!(old, new.merge(&diff)); 102 | } 103 | 104 | #[test] 105 | fn test_i64_serialize() { 106 | let value = 10_i64; 107 | let mut buf = vec![]; 108 | value.encode(&mut buf).unwrap(); 109 | 110 | let mut out: i64 = Default::default(); 111 | out.decode(&buf).expect("failed decode"); 112 | assert_eq!(value, out); 113 | } 114 | 115 | #[test] 116 | fn test_i64_footprint() { 117 | let value = 0_i64; 118 | assert_eq!(value.footprint().unwrap(), 0); 119 | } 120 | -------------------------------------------------------------------------------- /src/_archive/util_test.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fs, 3 | io::{self, Read, Seek, Write}, 4 | path::PathBuf, 5 | }; 6 | 7 | use crate::error::Error; 8 | 9 | use super::*; 10 | 11 | #[test] 12 | fn test_open_file_rw() { 13 | // case 1: try to create empty file. 14 | let dir = PathBuf::new(); 15 | let fd = create_file_a(dir.as_os_str().to_os_string()); 16 | match fd.expect_err("expected invalid-file") { 17 | Error::InvalidFile(_) => (), 18 | err => panic!("{:?}", err), 19 | } 20 | 21 | // case 2: try to create root dir as file. 22 | let mut dir = PathBuf::new(); 23 | dir.push("/"); 24 | let fd = create_file_a(dir.as_os_str().to_os_string()); 25 | match fd.expect_err("expected invalid-file") { 26 | Error::InvalidFile(_) => (), 27 | err => panic!("{:?}", err), 28 | } 29 | 30 | // case 3: with valid file, reuse: false 31 | let mut dir = std::env::temp_dir(); 32 | dir.push("rust.rdms.util.open_file_rw.txt"); 33 | let file = dir.as_path(); 34 | 35 | fs::remove_file(file).ok(); 36 | 37 | let file = file.as_os_str().to_os_string(); 38 | let mut fd = create_file_a(file.clone()).expect("open-write"); 39 | fd.write("hello world".as_bytes()).expect("write failed"); 40 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 41 | fd.write("i world".as_bytes()).expect("write failed"); 42 | 43 | let txt = fs::read(file).expect("read failed"); 44 | assert_eq!(std::str::from_utf8(&txt).unwrap(), "hello worldi world"); 45 | 46 | // case 4: with valid file, reuse: false, recreate 47 | let mut dir = std::env::temp_dir(); 48 | dir.push("rust.rdms.util.open_file_rw.txt"); 49 | let file = dir.as_path(); 50 | 51 | let file = file.as_os_str().to_os_string(); 52 | let mut fd = create_file_a(file.clone()).expect("open-write"); 53 | fd.write("hello world".as_bytes()).expect("write failed"); 54 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 55 | fd.write("i world".as_bytes()).expect("write failed"); 56 | 57 | let txt = fs::read(file).expect("read failed"); 58 | assert_eq!(std::str::from_utf8(&txt).unwrap(), "hello worldi world"); 59 | 60 | // case 5: with valid file, reuse: true, reuse file. 61 | let mut dir = std::env::temp_dir(); 62 | dir.push("rust.rdms.util.open_file_rw.txt"); 63 | let file = dir.as_path(); 64 | 65 | let file = file.as_os_str().to_os_string(); 66 | let mut fd = open_file_w(&file).expect("open-write"); 67 | fd.write("hello world".as_bytes()).expect("write failed"); 68 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 69 | fd.write("i world".as_bytes()).expect("write failed"); 70 | 71 | let txt = fs::read(file.clone()).expect("read failed"); 72 | assert_eq!( 73 | std::str::from_utf8(&txt).unwrap(), 74 | "hello worldi worldhello worldi world" 75 | ); 76 | 77 | // case 6: read file. 78 | let mut fd = open_file_r(file.as_ref()).expect("open-read"); 79 | let mut txt = [0_u8; 36]; 80 | fd.read(&mut txt).expect("read failed"); 81 | assert_eq!( 82 | std::str::from_utf8(&txt).unwrap(), 83 | "hello worldi worldhello worldi world" 84 | ); 85 | 86 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 87 | fd.read(&mut txt[0..35]).expect("read failed"); 88 | assert_eq!( 89 | std::str::from_utf8(&txt).unwrap(), 90 | "ello worldi worldhello worldi worldd" 91 | ); 92 | 93 | fd.write("hello world".as_bytes()) 94 | .expect_err("expected write error"); 95 | } 96 | 97 | #[test] 98 | fn test_as_sharded_array() { 99 | for i in 0..100 { 100 | let array: Vec = (0..i).collect(); 101 | for n_shards in 0..100 { 102 | let acc = as_sharded_array(&array, n_shards); 103 | assert_eq!(acc.len(), n_shards); 104 | assert!(acc.len() <= n_shards, "{} {}", acc.len(), n_shards); 105 | if n_shards > 0 { 106 | let res: Vec = { 107 | let iter = acc.iter().flat_map(|shard| shard.to_vec()); 108 | iter.collect() 109 | }; 110 | assert_eq!(array, res); 111 | } 112 | } 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/_archive/vlog_test.rs: -------------------------------------------------------------------------------- 1 | use crate::{core::Footprint, vlog}; 2 | 3 | #[test] 4 | fn test_value() { 5 | let value = vlog::Value::new_native(10); 6 | assert_eq!(value.footprint().unwrap(), 0); 7 | // encode 8 | let mut out = vec![]; 9 | assert_eq!(value.encode(&mut out).unwrap(), (None, 12)); 10 | assert_eq!(out, vec![16, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 10]); 11 | // to_native_value 12 | assert_eq!(value.to_native_value(), Some(10)); 13 | 14 | let value = vlog::Value::::new_reference(10, 100, 20); 15 | assert_eq!(value.footprint().unwrap(), 0); 16 | assert_eq!(value.to_native_value(), None); 17 | 18 | let value = vlog::Value::new_native(vec![10_u8, 20, 30]); 19 | assert_eq!(value.footprint().unwrap(), 3); 20 | // encode 21 | let mut out = vec![]; 22 | assert_eq!(value.encode(&mut out).unwrap(), (None, 15)); 23 | assert_eq!(out, vec![16, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 3, 10, 20, 30]); 24 | // to_native_value 25 | assert_eq!(value.to_native_value(), Some(vec![10_u8, 20, 30])); 26 | } 27 | 28 | #[test] 29 | fn test_fetch_value() { 30 | let mut path = std::env::temp_dir(); 31 | path.push("test_fetch_value.data"); 32 | 33 | let value = vlog::Value::new_native(vec![10_u8, 20, 30]); 34 | let mut refb = vec![]; 35 | value.encode(&mut refb).unwrap(); 36 | 37 | std::fs::write(path.clone(), &refb).expect("io failure"); 38 | let out = std::fs::read(path).unwrap(); 39 | assert_eq!(refb, out); 40 | } 41 | 42 | #[test] 43 | fn test_delta() { 44 | let delta = vlog::Delta::::new_native(10); 45 | assert_eq!(delta.footprint().unwrap(), 0); 46 | // encode 47 | let mut out = vec![]; 48 | assert_eq!(delta.encode(&mut out).unwrap(), 12); 49 | assert_eq!(out, vec![0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 10]); 50 | // into_native_delta 51 | assert_eq!(delta.into_native_delta(), Some(10)); 52 | 53 | let delta = vlog::Delta::::new_reference(10, 100, 20); 54 | assert_eq!(delta.footprint().unwrap(), 0); 55 | assert_eq!(delta.into_native_delta(), None); 56 | 57 | let delta = vlog::Delta::>::new_native(vec![10_u8, 20, 30]); 58 | assert_eq!(delta.footprint().unwrap(), 3); 59 | // encode 60 | let mut out = vec![]; 61 | assert_eq!(delta.encode(&mut out).unwrap(), 15); 62 | assert_eq!(out, vec![0, 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, 3, 10, 20, 30]); 63 | // into_native_delta 64 | assert_eq!(delta.into_native_delta(), Some(vec![10_u8, 20, 30])); 65 | } 66 | 67 | #[test] 68 | fn test_fetch_delta() { 69 | let mut path = std::env::temp_dir(); 70 | path.push("test_fetch_delta.data"); 71 | 72 | let delta = vlog::Delta::>::new_native(vec![10_u8, 20, 30]); 73 | let mut refb = vec![]; 74 | delta.encode(&mut refb).unwrap(); 75 | 76 | std::fs::write(path.clone(), &refb).expect("io failure"); 77 | let out = std::fs::read(path).unwrap(); 78 | assert_eq!(refb, out); 79 | } 80 | -------------------------------------------------------------------------------- /src/bin/crio/main.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | use std::ffi; 4 | 5 | mod cmd_fetch; 6 | mod types; 7 | 8 | pub const TEMP_DIR_CRIO: &str = "crio"; 9 | 10 | #[derive(Clone, StructOpt)] 11 | struct Opt { 12 | #[structopt(subcommand)] 13 | subcmd: SubCommand, 14 | } 15 | 16 | #[derive(Clone, StructOpt)] 17 | pub enum SubCommand { 18 | /// Fetch the crates_io dump via http, untar the file and extract the tables. 19 | Fetch { 20 | #[structopt(long = "nohttp")] 21 | nohttp: bool, 22 | 23 | #[structopt(long = "nountar")] 24 | nountar: bool, 25 | 26 | #[structopt(long = "nocopy")] 27 | nocopy: bool, 28 | 29 | #[structopt(long = "git")] 30 | git_root: Option, 31 | 32 | #[structopt(short = "c")] 33 | profile: ffi::OsString, 34 | }, 35 | } 36 | 37 | fn main() { 38 | let opts = Opt::from_iter(std::env::args_os()); 39 | 40 | let res = match opts.subcmd { 41 | c @ SubCommand::Fetch { .. } => cmd_fetch::handle(cmd_fetch::Opt::from(c)), 42 | }; 43 | 44 | res.map_err(|e| println!("Error: {}", e)).ok(); 45 | } 46 | -------------------------------------------------------------------------------- /src/bin/crio/profile.toml: -------------------------------------------------------------------------------- 1 | temp_dir = "/media/prataprc/hdd1.4tb/tmp" 2 | dump_url = "http://static.crates.io/db-dump.tar.gz" 3 | git_index_dir = "index" 4 | git_analytics_dir = "analytics" 5 | 6 | [git] 7 | loc_repo = "/media/prataprc/hdd1.4tb/db-rdms-crio" 8 | loc_db = "" 9 | user_name = "prataprc" 10 | user_email = "prataprc@gmail.com" 11 | 12 | [git.init] 13 | bare = false 14 | no_reinit = true 15 | permissions = "shared_umask" 16 | description = "repository holds data from crates.io db dump" 17 | 18 | [git.open] 19 | no_search = true 20 | -------------------------------------------------------------------------------- /src/bin/diskio/plot.rs: -------------------------------------------------------------------------------- 1 | use std::path; 2 | use std::time; 3 | 4 | use plotters::prelude::*; 5 | 6 | pub fn latency( 7 | path: path::PathBuf, 8 | title: String, 9 | mut values: Vec, 10 | ) -> Result<(), Box> { 11 | println!("plotting latency graph {}", title); 12 | 13 | let root = BitMapBackend::new(&path, (1024, 768)).into_drawing_area(); 14 | root.fill(&WHITE)?; 15 | 16 | let (xmin, xmax) = (0_u64, values.len() as u64); 17 | let (ymin, ymax) = (0_u64, values.iter().max().cloned().unwrap_or(0)); 18 | let mut scatter_ctx = ChartBuilder::on(&root) 19 | .x_label_area_size(40) 20 | .y_label_area_size(60) 21 | .margin(10) 22 | .caption(&title, ("Arial", 30).into_font()) 23 | .build_cartesian_2d(xmin..xmax, ymin..ymax)?; 24 | scatter_ctx 25 | .configure_mesh() 26 | .disable_x_mesh() 27 | .disable_y_mesh() 28 | .label_style(("Arial", 15).into_font()) 29 | .x_desc("N") 30 | .y_desc("Millisecond") 31 | .axis_desc_style(("Arial", 20).into_font()) 32 | .draw()?; 33 | scatter_ctx.draw_series( 34 | values 35 | .iter() 36 | .enumerate() 37 | .map(|(i, l)| Circle::new((i as u64, *l), 2, RED.filled())), 38 | )?; 39 | 40 | values.sort(); 41 | let off = (values.len() as f64 * 0.99) as usize; 42 | let p99 = time::Duration::from_nanos(values[off] * 1000); 43 | println!("99th percentile latency: {:?}", p99); 44 | Ok(()) 45 | } 46 | 47 | pub fn throughput( 48 | path: path::PathBuf, 49 | title: String, 50 | mut values: Vec, 51 | ) -> Result<(), Box> { 52 | println!("plotting throughput graph {}", title); 53 | 54 | values.insert(0, 0); 55 | let throughputs: Vec = 56 | values.clone().into_iter().map(|x| (x as f64) / (1024_f64 * 1024_f64)).collect(); 57 | 58 | let root = BitMapBackend::new(&path, (1024, 768)).into_drawing_area(); 59 | root.fill(&WHITE)?; 60 | 61 | let (xmin, xmax) = (0_u64, values.len() as u64); 62 | let (ymin, ymax) = (0_f64, values.iter().max().cloned().unwrap_or(0) as f64); 63 | let ymax = ymax / (1024_f64 * 1024_f64); 64 | let ymax = ymax + (ymax / 3_f64); 65 | let mut cc = ChartBuilder::on(&root) 66 | .x_label_area_size(40_i32) 67 | .y_label_area_size(60_i32) 68 | .margin(10_i32) 69 | .caption(&title, ("Arial", 30).into_font()) 70 | .build_cartesian_2d(xmin..xmax, ymin..ymax)?; 71 | 72 | cc.configure_mesh() 73 | .bold_line_style(&WHITE) 74 | .label_style(("Arial", 15).into_font()) 75 | .x_desc("Seconds") 76 | .y_desc("Throughput MB/sec") 77 | .axis_desc_style(("Arial", 20).into_font()) 78 | .draw()?; 79 | 80 | cc.draw_series(LineSeries::new( 81 | throughputs.into_iter().enumerate().map(|(i, value)| (i as u64, value)), 82 | &RED, 83 | ))?; 84 | 85 | Ok(()) 86 | } 87 | -------------------------------------------------------------------------------- /src/bin/diskio/stats.rs: -------------------------------------------------------------------------------- 1 | use std::convert::TryInto; 2 | use std::time; 3 | 4 | use rdms::{err_at, Error, Result}; 5 | 6 | pub struct Stats { 7 | tp_second: time::SystemTime, 8 | tp_current: u64, 9 | pub file_size: u64, 10 | pub sync_latencies: Vec, 11 | pub throughputs: Vec, 12 | } 13 | 14 | impl Stats { 15 | pub fn new() -> Stats { 16 | Stats { 17 | tp_second: time::SystemTime::now(), 18 | tp_current: 0, 19 | sync_latencies: vec![], 20 | throughputs: vec![], 21 | file_size: Default::default(), 22 | } 23 | } 24 | 25 | pub fn click(&mut self, start: time::SystemTime, size: u64) -> Result<()> { 26 | if err_at!(Fatal, self.tp_second.elapsed())?.as_secs() == 1 { 27 | self.throughputs.push(self.tp_current); 28 | self.tp_second = time::SystemTime::now(); 29 | self.tp_current = 0; 30 | } else { 31 | self.tp_current += size; 32 | } 33 | self.sync_latencies 34 | .push(err_at!(Fatal, start.elapsed())?.as_micros().try_into().unwrap()); 35 | 36 | Ok(()) 37 | } 38 | 39 | pub fn join(&mut self, other: Stats) { 40 | self.sync_latencies.extend_from_slice(&other.sync_latencies); 41 | self.throughputs.resize(other.throughputs.len(), 0); 42 | self.throughputs 43 | .iter_mut() 44 | .zip(other.throughputs.iter()) 45 | .for_each(|(x, y)| *x += *y); 46 | self.file_size += other.file_size; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/bin/html/main.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | use std::{convert::TryFrom, ffi, fs}; 4 | 5 | use rdms::{err_at, Error, Result}; 6 | 7 | pub const TEMP_DIR_CRIO: &str = "crio"; 8 | 9 | #[derive(Clone, StructOpt)] 10 | struct Opt { 11 | #[structopt(long = "parsec")] 12 | parsec: bool, 13 | 14 | file: Option, 15 | //#[structopt(subcommand)] 16 | //subcmd: SubCommand, 17 | } 18 | 19 | //#[derive(Clone, StructOpt)] 20 | //pub enum SubCommand { 21 | // /// Fetch the crates_io dump via http, untar the file and extract the tables. 22 | // Fetch { 23 | // #[structopt(long = "nohttp")] 24 | // nohttp: bool, 25 | // 26 | // #[structopt(long = "nountar")] 27 | // nountar: bool, 28 | // 29 | // #[structopt(long = "nocopy")] 30 | // nocopy: bool, 31 | // 32 | // #[structopt(long = "git")] 33 | // git_root: Option, 34 | // 35 | // #[structopt(short = "c")] 36 | // profile: ffi::OsString, 37 | // }, 38 | //} 39 | 40 | fn main() { 41 | let opts = Opt::from_iter(std::env::args_os()); 42 | 43 | let res = if opts.parsec { 44 | //let parser = html::new_parser().unwrap(); 45 | //parser.pretty_print(""); 46 | Ok(()) 47 | } else if let Some(file) = opts.file.clone() { 48 | dom_list(file, opts) 49 | } else { 50 | Ok(()) 51 | }; 52 | 53 | match res { 54 | Ok(()) => (), 55 | Err(err) => println!("Error: {}", err), 56 | } 57 | } 58 | 59 | fn dom_list(file: ffi::OsString, _opts: Opt) -> Result<()> { 60 | let text = { 61 | let data = err_at!(IOError, fs::read(&file))?; 62 | let text = err_at!(FailConvert, std::str::from_utf8(&data))?.to_string(); 63 | text.trim().to_string() 64 | }; 65 | let doc = scraper::html::Html::parse_document(&text); 66 | let body: scraper::ElementRef = doc 67 | .root_element() 68 | .select(&scraper::selector::Selector::try_from("body").unwrap()) 69 | .next() 70 | .unwrap(); 71 | 72 | let text_iter = body.text().filter_map(|t| match t.trim() { 73 | "" => None, 74 | s => Some(s), 75 | }); 76 | 77 | for t in text_iter { 78 | println!("{}", t); 79 | } 80 | 81 | Ok(()) 82 | } 83 | -------------------------------------------------------------------------------- /src/bin/nltk/main.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | use std::ffi; 4 | 5 | // use rdms::{err_at, Error, Result}; 6 | 7 | pub const TEMP_DIR_CRIO: &str = "crio"; 8 | 9 | #[derive(Clone, StructOpt)] 10 | struct Opt { 11 | #[structopt(long = "words")] 12 | words: bool, 13 | 14 | #[structopt(long = "zimf")] 15 | zimf: Option, 16 | 17 | #[structopt(long = "threads", default_value = "64")] 18 | pool_size: usize, 19 | //#[structopt(subcommand)] 20 | //subcmd: SubCommand, 21 | } 22 | 23 | //#[derive(Clone, StructOpt)] 24 | //pub enum SubCommand { 25 | // /// Fetch the crates_io dump via http, untar the file and extract the tables. 26 | // Fetch { 27 | // #[structopt(long = "nohttp")] 28 | // nohttp: bool, 29 | // 30 | // #[structopt(long = "nountar")] 31 | // nountar: bool, 32 | // 33 | // #[structopt(long = "nocopy")] 34 | // nocopy: bool, 35 | // 36 | // #[structopt(long = "git")] 37 | // git_root: Option, 38 | // 39 | // #[structopt(short = "c")] 40 | // profile: ffi::OsString, 41 | // }, 42 | //} 43 | 44 | fn main() { 45 | //let opts = Opt::from_iter(std::env::args_os()); 46 | 47 | //let res = if let Some(zim_file) => opts.zimf { 48 | // work_zimf(zim_file.clone(), opts) 49 | //}; 50 | 51 | //match res { 52 | // Ok(()) => (), 53 | // Err(err) => println!("Error: {}", err), 54 | //} 55 | } 56 | 57 | //fn work_zimf(zim_file: ffi::OsString, opts: Opt) -> Result<()> { 58 | // let z = Zimf::open(zim_file.clone(), opts.pool_size).unwrap(); 59 | // Ok(()) 60 | //} 61 | // 62 | //fn dom_list(file: ffi::OsString, _opts: Opt) -> Result<()> { 63 | // let text = { 64 | // let data = err_at!(IOError, fs::read(&file))?; 65 | // let text = err_at!(FailConvert, std::str::from_utf8(&data))?.to_string(); 66 | // text.trim().to_string() 67 | // }; 68 | // let doc = scraper::html::Html::parse_document(&text); 69 | // let body: scraper::ElementRef = doc 70 | // .root_element() 71 | // .select(&scraper::selector::Selector::try_from("body").unwrap()) 72 | // .next() 73 | // .unwrap(); 74 | // 75 | // let text_iter = body.text().filter_map(|t| match t.trim() { 76 | // "" => None, 77 | // s => Some(s), 78 | // }); 79 | // 80 | // for t in text_iter { 81 | // println!("{}", t); 82 | // } 83 | // 84 | // Ok(()) 85 | //} 86 | -------------------------------------------------------------------------------- /src/bin/pms/cmd_clone.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryFrom, path}; 2 | 3 | use crate::{h, Config, Handler, SubCommand}; 4 | 5 | use rdms::{err_at, git::repo, Error, Result}; 6 | 7 | #[derive(Clone)] 8 | pub struct Handle { 9 | pub exclude_dirs: Vec, 10 | pub src_dir: path::PathBuf, 11 | pub dst_dir: path::PathBuf, 12 | } 13 | 14 | impl TryFrom for Handle { 15 | type Error = Error; 16 | 17 | fn try_from(subcmd: crate::SubCommand) -> Result { 18 | let opt = match subcmd { 19 | SubCommand::Clone { src_dir, dst_dir } => Handle { 20 | exclude_dirs: Vec::default(), 21 | src_dir: src_dir.into(), 22 | dst_dir: dst_dir.into(), 23 | }, 24 | _ => unreachable!(), 25 | }; 26 | 27 | Ok(opt) 28 | } 29 | } 30 | 31 | impl Handler for Handle { 32 | fn to_scan_dirs(&self) -> Vec { 33 | vec![self.src_dir.clone()] 34 | } 35 | 36 | fn to_exclude_dirs(&self) -> Vec { 37 | self.exclude_dirs.to_vec() 38 | } 39 | } 40 | 41 | impl Handle { 42 | fn update_with_cfg(mut self, cfg: &Config) -> Self { 43 | self.exclude_dirs.extend_from_slice(&cfg.scan.exclude_dirs); 44 | self 45 | } 46 | } 47 | 48 | pub fn handle(mut h: Handle, cfg: Config) -> Result<()> { 49 | h = h.update_with_cfg(&cfg); 50 | 51 | let ws = h::WalkState::new(h.clone()).scan()?; 52 | 53 | let src_dir = ws.to_scan_dir(); 54 | let mut repos: Vec = ws.into_repositories()?; 55 | 56 | repos.sort_unstable_by_key(|r| r.to_loc()); 57 | 58 | for repo in repos.into_iter() { 59 | let src_loc = repo.to_loc(); 60 | let dst_loc: path::PathBuf = { 61 | let dst = err_at!(Fatal, src_loc.strip_prefix(&src_dir))?.to_path_buf(); 62 | [h.dst_dir.clone(), dst].iter().collect() 63 | }; 64 | repo::clone(src_loc, dst_loc)?; 65 | } 66 | 67 | Ok(()) 68 | } 69 | -------------------------------------------------------------------------------- /src/bin/pms/cmd_excluded.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryFrom, path}; 2 | 3 | use crate::{h, Config, Handler, SubCommand}; 4 | 5 | use rdms::{git::repo, trie, util::files, Error, Result}; 6 | 7 | #[derive(Clone)] 8 | pub struct Handle { 9 | pub scan_dirs: Vec, 10 | pub exclude_dirs: Vec, 11 | } 12 | 13 | impl TryFrom for Handle { 14 | type Error = Error; 15 | 16 | fn try_from(subcmd: crate::SubCommand) -> Result { 17 | let opt = match subcmd { 18 | SubCommand::Excluded { scan_dir } => Handle { 19 | scan_dirs: scan_dir.map(|d| vec![d.into()]).unwrap_or_else(|| vec![]), 20 | exclude_dirs: Vec::default(), 21 | }, 22 | _ => unreachable!(), 23 | }; 24 | 25 | Ok(opt) 26 | } 27 | } 28 | 29 | impl Handler for Handle { 30 | fn to_scan_dirs(&self) -> Vec { 31 | self.scan_dirs.to_vec() 32 | } 33 | 34 | fn to_exclude_dirs(&self) -> Vec { 35 | self.exclude_dirs.to_vec() 36 | } 37 | } 38 | 39 | impl Handle { 40 | fn update_with_cfg(mut self, cfg: &Config) -> Self { 41 | self.scan_dirs.extend_from_slice(&cfg.scan.scan_dirs); 42 | self.exclude_dirs.extend_from_slice(&cfg.scan.exclude_dirs); 43 | 44 | self 45 | } 46 | } 47 | 48 | pub fn handle(mut h: Handle, cfg: Config) -> Result<()> { 49 | h = h.update_with_cfg(&cfg); 50 | 51 | let index = h::WalkState::new(h.clone()).scan()?.into_trie(); 52 | 53 | let mut repos: Vec = index 54 | .walk(Vec::::default(), |repos, _, _, value, _, _| { 55 | value.map(|repo| repos.push(repo.clone())); 56 | Ok(trie::WalkRes::Ok) 57 | })? 58 | .into_iter() 59 | .filter(|r| files::is_excluded(&r.to_loc(), &h.to_exclude_dirs())) 60 | .collect(); 61 | 62 | repos.sort_unstable_by_key(|r| r.to_last_commit_date(None).unwrap()); 63 | 64 | for repo in repos.into_iter() { 65 | println!("excluded {:?}", repo.to_loc()) 66 | } 67 | 68 | Ok(()) 69 | } 70 | -------------------------------------------------------------------------------- /src/bin/pms/config.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | 3 | use std::path; 4 | 5 | use rdms::util::files; 6 | 7 | // TODO: implement glob-filtering for excluded_dirs and include_dirs 8 | 9 | #[derive(Clone, Default)] 10 | pub struct Config { 11 | pub hot: Option, 12 | pub cold: Option, 13 | pub scan: Scan, 14 | } 15 | 16 | #[derive(Clone, Default)] 17 | pub struct Scan { 18 | pub scan_dirs: Vec, 19 | pub exclude_dirs: Vec, 20 | } 21 | 22 | #[derive(Clone, Deserialize)] 23 | pub struct TomlConfig { 24 | hot: Option, // in months 25 | cold: Option, // in months 26 | scan: Option, 27 | } 28 | 29 | impl From for Config { 30 | fn from(cfg: TomlConfig) -> Config { 31 | Config { 32 | hot: cfg.hot, 33 | cold: cfg.cold, 34 | scan: cfg.scan.into(), 35 | } 36 | } 37 | } 38 | 39 | #[derive(Clone, Debug, Deserialize)] 40 | pub struct TomlScan { 41 | scan_dirs: Option>, 42 | exclude_dirs: Option>, 43 | } 44 | 45 | impl From> for Scan { 46 | fn from(toml_scan: Option) -> Scan { 47 | match toml_scan { 48 | Some(toml_scan) => { 49 | let scan_dirs = toml_scan 50 | .scan_dirs 51 | .unwrap_or_else(|| vec![]) 52 | .iter() 53 | .filter_map(files::canonicalize) 54 | .collect(); 55 | 56 | Scan { 57 | scan_dirs, 58 | exclude_dirs: toml_scan.exclude_dirs.unwrap_or_else(|| vec![]), 59 | } 60 | } 61 | None => Scan { scan_dirs: vec![], exclude_dirs: vec![] }, 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/bin/pms/h.rs: -------------------------------------------------------------------------------- 1 | use std::{ffi, fs, path}; 2 | 3 | use rdms::{err_at, git::repo, trie, util::files, Error, Result}; 4 | 5 | use crate::Handler; 6 | 7 | #[derive(Clone)] 8 | pub struct WalkState 9 | where 10 | H: Handler, 11 | { 12 | scan_dir: path::PathBuf, 13 | h: H, 14 | repos: Vec, 15 | } 16 | 17 | impl WalkState 18 | where 19 | H: Handler + Clone, 20 | { 21 | pub fn new(handler: H) -> Self { 22 | WalkState { 23 | scan_dir: path::PathBuf::default(), 24 | h: handler, 25 | repos: Vec::default(), 26 | } 27 | } 28 | 29 | pub fn to_scan_dir(&self) -> path::PathBuf { 30 | self.scan_dir.clone() 31 | } 32 | 33 | pub fn scan(mut self) -> Result { 34 | let mut iter = self.h.to_scan_dirs().into_iter(); 35 | loop { 36 | match iter.next() { 37 | Some(scan_dir) => { 38 | self.scan_dir = scan_dir.clone(); 39 | { 40 | if let Ok(repo) = repo::Repo::from_loc(&scan_dir) { 41 | self.repos.push(repo); 42 | } 43 | } 44 | match files::walk(&scan_dir, self.clone(), check_dir_entry) { 45 | Ok(ws) => break Ok(ws), 46 | Err(err) => println!("skip scan_dir {:?}, {}", scan_dir, err), 47 | }; 48 | } 49 | None => break err_at!(InvalidFile, msg: "invalid scan dirs"), 50 | } 51 | } 52 | } 53 | 54 | pub fn into_repositories(self) -> Result> { 55 | let index = self.clone().into_trie(); 56 | 57 | let mut repos: Vec = index 58 | .walk(Vec::::default(), |repos, _, _, value, _, _| { 59 | value.map(|repo| repos.push(repo.clone())); 60 | Ok(trie::WalkRes::Ok) 61 | })? 62 | .into_iter() 63 | .filter(|r| !files::is_excluded(&r.to_loc(), &self.h.to_exclude_dirs())) 64 | .collect(); 65 | 66 | repos.sort_unstable_by_key(|r| r.to_last_commit_date(None).unwrap()); 67 | 68 | Ok(repos) 69 | } 70 | 71 | pub fn into_trie(self) -> trie::Trie { 72 | self.repos.clone().into_iter().fold(trie::Trie::new(), |mut index, repo| { 73 | let comps: Vec = path::PathBuf::from(&repo.to_loc()) 74 | .components() 75 | .map(|c| c.as_os_str().to_os_string()) 76 | .collect(); 77 | index.set(&comps, repo); 78 | index 79 | }) 80 | } 81 | } 82 | 83 | fn check_dir_entry( 84 | walk_state: &mut WalkState, 85 | parent: &path::Path, 86 | entry: &fs::DirEntry, 87 | _depth: usize, 88 | _breath: usize, 89 | ) -> Result 90 | where 91 | H: Handler, 92 | { 93 | if let Some(".git") = entry.file_name().to_str() { 94 | Ok(files::WalkRes::SkipDir) 95 | } else { 96 | if let Ok(repo) = repo::Repo::from_entry(parent, entry) { 97 | walk_state.repos.push(repo); 98 | } 99 | Ok(files::WalkRes::Ok) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/bin/pms/main.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | use std::{convert::TryFrom, ffi, path}; 4 | 5 | use rdms::{util::files, Result}; 6 | 7 | mod cmd_clone; 8 | mod cmd_excluded; 9 | mod cmd_status; 10 | mod config; 11 | mod h; 12 | 13 | use config::{Config, TomlConfig}; 14 | 15 | /// Options for cmd 16 | #[derive(StructOpt)] 17 | pub struct Opt { 18 | #[structopt( 19 | long = "toml", 20 | help = "Location to config file for processing git repositories" 21 | )] 22 | toml: Option, 23 | 24 | #[structopt( 25 | long = "db", 26 | help = "Location to db, where pms database is persisted on disk" 27 | )] 28 | db: Option, 29 | 30 | #[structopt(subcommand)] 31 | subcmd: SubCommand, 32 | } 33 | 34 | #[derive(Clone, StructOpt)] 35 | pub enum SubCommand { 36 | /// Status subcommand, to scan local git repositories. 37 | Status { 38 | #[structopt( 39 | long = "path", 40 | help = "root path to start looking for git repositories" 41 | )] 42 | scan_dir: Option, 43 | 44 | #[structopt( 45 | long = "ignored", 46 | help = "included ignored files in git2::DiffOptions" 47 | )] 48 | ignored: bool, 49 | 50 | #[structopt(long = "force_color", help = "force color for non-terminal devices")] 51 | force_color: bool, 52 | 53 | #[structopt(long = "states", help = "list states of a repository")] 54 | states: bool, 55 | }, 56 | /// Excluded subcommand, to list repositories detected under but excluded. 57 | Excluded { 58 | #[structopt( 59 | long = "path", 60 | help = "root path to start looking for git repositories" 61 | )] 62 | scan_dir: Option, 63 | }, 64 | /// Clone subcommand, to clone repositories found in to . 65 | /// 66 | /// As and when required new directories shall be created in . 67 | Clone { 68 | #[structopt(long = "src", help = "clone repositories from specified source")] 69 | src_dir: ffi::OsString, 70 | 71 | #[structopt(long = "dst", help = "clone repositories into specified destin.")] 72 | dst_dir: ffi::OsString, 73 | }, 74 | } 75 | 76 | pub trait Handler { 77 | fn to_scan_dirs(&self) -> Vec; 78 | 79 | fn to_exclude_dirs(&self) -> Vec; 80 | } 81 | 82 | fn main() { 83 | let opts = Opt::from_iter(std::env::args_os()); 84 | 85 | let res = handle(opts); 86 | res.map_err(|e| println!("Error: {}", e)).ok(); 87 | } 88 | 89 | fn handle(opts: Opt) -> Result<()> { 90 | let cfg: Config = { 91 | let loc_toml = files::find_config(opts.toml.clone(), &["pms.toml", ".pms.toml"]); 92 | match loc_toml.as_ref() { 93 | Some(loc_toml) => files::load_toml::<_, TomlConfig>(loc_toml)?.into(), 94 | None => Config::default(), 95 | } 96 | }; 97 | 98 | handle_subcmd(opts, cfg) 99 | } 100 | 101 | fn handle_subcmd(opts: Opt, cfg: Config) -> Result<()> { 102 | match opts.subcmd { 103 | c @ SubCommand::Status { .. } => { 104 | cmd_status::handle(cmd_status::Handle::try_from(c)?, cfg) 105 | } 106 | c @ SubCommand::Excluded { .. } => { 107 | cmd_excluded::handle(cmd_excluded::Handle::try_from(c)?, cfg) 108 | } 109 | c @ SubCommand::Clone { .. } => { 110 | cmd_clone::handle(cmd_clone::Handle::try_from(c)?, cfg) 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/bin/rdms/cmd_git.rs: -------------------------------------------------------------------------------- 1 | use std::ffi; 2 | 3 | use rdms::{err_at, Error, Result}; 4 | 5 | use crate::SubCommand; 6 | 7 | pub struct Opt { 8 | pub loc_repo: Option, 9 | pub loc_db: Option, 10 | pub sha1_file: Option, 11 | pub sha1_text: Option, 12 | } 13 | 14 | impl From for Opt { 15 | fn from(subcmd: crate::SubCommand) -> Opt { 16 | match subcmd { 17 | SubCommand::Git { loc_repo, loc_db, sha1_file, sha1_text } => { 18 | Opt { loc_repo, loc_db, sha1_file, sha1_text } 19 | } 20 | _ => unreachable!(), 21 | } 22 | } 23 | } 24 | 25 | pub fn handle(opts: Opt) -> Result<()> { 26 | if let Some(sha1_file) = opts.sha1_file { 27 | return handle_sha1_file(sha1_file); 28 | } else if let Some(sha1_text) = opts.sha1_text { 29 | return handle_sha1_text(sha1_text); 30 | } 31 | 32 | Ok(()) 33 | } 34 | 35 | fn handle_sha1_file(sha1_file: ffi::OsString) -> Result<()> { 36 | use sha1::{Digest, Sha1}; 37 | use std::fs; 38 | 39 | let git_oid = 40 | err_at!(FailGitapi, git2::Oid::hash_file(git2::ObjectType::Blob, &sha1_file))?; 41 | 42 | let mut hasher = Sha1::new(); 43 | let data = { 44 | let payload = err_at!(IOError, fs::read(&sha1_file))?; 45 | let mut header = format!("blob {}\0", payload.len()).as_bytes().to_vec(); 46 | header.extend(&payload); 47 | header 48 | }; 49 | hasher.update(&data); 50 | let our_oid = err_at!(FailGitapi, git2::Oid::from_bytes(&hasher.finalize()))?; 51 | 52 | println!("git-sha1: {}", git_oid.to_string()); 53 | println!("our-sha1: {}", our_oid.to_string()); 54 | 55 | Ok(()) 56 | } 57 | 58 | fn handle_sha1_text(sha1_text: String) -> Result<()> { 59 | use sha1::{Digest, Sha1}; 60 | 61 | let mut hasher = Sha1::new(); 62 | let data = { 63 | let payload = sha1_text.as_bytes(); 64 | let mut header = format!("blob {}\0", payload.len()).as_bytes().to_vec(); 65 | header.extend(payload); 66 | header 67 | }; 68 | hasher.update(&data); 69 | let oid = err_at!(FailGitapi, git2::Oid::from_bytes(&hasher.finalize()))?; 70 | 71 | println!("our-sha1: {}", oid.to_string()); 72 | 73 | Ok(()) 74 | } 75 | -------------------------------------------------------------------------------- /src/bin/rdms/cmd_perf.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng}; 2 | 3 | use rdms::Result; 4 | 5 | use crate::SubCommand; 6 | 7 | pub trait Generate { 8 | fn gen_key(&self, rng: &mut StdRng) -> T; 9 | 10 | fn gen_value(&self, rng: &mut StdRng) -> T; 11 | } 12 | 13 | pub struct Opt { 14 | pub seed: u64, 15 | pub profile: String, 16 | pub module: String, 17 | } 18 | 19 | impl From for Opt { 20 | fn from(subcmd: crate::SubCommand) -> Opt { 21 | match subcmd { 22 | SubCommand::Perf { seed, profile, module } => Opt { seed, profile, module }, 23 | _ => unreachable!(), 24 | } 25 | } 26 | } 27 | 28 | pub fn perf(mut opts: Opt) -> Result<()> { 29 | if opts.seed == 0 { 30 | opts.seed = random(); 31 | } 32 | 33 | match opts.module.as_str() { 34 | "btree" | "btree_map" | "btree-map" => crate::perf_btree_map::perf(opts).unwrap(), 35 | "llrb" => crate::perf_llrb::perf(opts)?, 36 | "lmdb" => crate::perf_lmdb::perf(opts)?, 37 | "robt" => crate::perf_robt::perf(opts)?, 38 | "wral" => crate::perf_wral::perf(opts)?, 39 | module => println!("rdms: error invalid module {}", module), 40 | } 41 | 42 | Ok(()) 43 | } 44 | -------------------------------------------------------------------------------- /src/bin/rdms/main.rs: -------------------------------------------------------------------------------- 1 | use structopt::StructOpt; 2 | 3 | use std::ffi; 4 | 5 | use rdms::Result; 6 | 7 | mod cmd_git; 8 | mod cmd_perf; 9 | 10 | mod perf_btree_map; 11 | mod perf_llrb; 12 | mod perf_lmdb; 13 | mod perf_robt; 14 | mod perf_wral; 15 | 16 | /// Options for cmd 17 | #[derive(Clone, StructOpt)] 18 | pub struct Opt { 19 | #[structopt(subcommand)] 20 | subcmd: SubCommand, 21 | } 22 | 23 | #[derive(Clone, StructOpt)] 24 | pub enum SubCommand { 25 | /// perf-subcommand, to execute a performance profile, to measure algorithms. 26 | Perf { 27 | #[structopt(long = "seed", default_value = "0")] 28 | seed: u64, 29 | 30 | #[structopt(long = "profile", default_value = "")] 31 | profile: String, 32 | 33 | #[structopt(short = "m", long = "module", default_value = "llrb")] 34 | module: String, 35 | }, 36 | /// git-subcommand, to play with git and dba systems. 37 | Git { 38 | #[structopt(long = "repo", help = "location of git repository")] 39 | loc_repo: Option, 40 | 41 | #[structopt( 42 | long = "db", 43 | help = "db-path within git repository, refer " 44 | )] 45 | loc_db: Option, 46 | 47 | #[structopt(long = "sha1-file", help = "generate SHA1 hash for file's content")] 48 | sha1_file: Option, 49 | 50 | #[structopt(long = "sha1", help = "generate SHA1 hash for text")] 51 | sha1_text: Option, 52 | }, 53 | } 54 | 55 | fn main() { 56 | let opts = Opt::from_iter(std::env::args_os()); 57 | 58 | let res = handle_subcmd(opts); 59 | res.map_err(|e| println!("Error: {}", e)).ok(); 60 | } 61 | 62 | fn handle_subcmd(opts: Opt) -> Result<()> { 63 | match opts.subcmd { 64 | c @ SubCommand::Perf { .. } => cmd_perf::perf(cmd_perf::Opt::from(c)), 65 | c @ SubCommand::Git { .. } => cmd_git::handle(cmd_git::Opt::from(c)), 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/bin/rdms/perf-profiles/default-btree-map.toml: -------------------------------------------------------------------------------- 1 | key_type = "u64" 2 | value_type = "u64" 3 | key_size = 16 4 | value_size = 16 5 | loads = 10_000_000 6 | sets = 0 7 | rems = 0 8 | gets = 10_000_000 9 | -------------------------------------------------------------------------------- /src/bin/rdms/perf-profiles/default-llrb.toml: -------------------------------------------------------------------------------- 1 | key_type = "u64" 2 | value_type = "u64" 3 | key_size = 16 4 | value_size = 16 5 | spin = false 6 | cas = false 7 | loads = 10_000_000 8 | sets = 1_000_000 9 | ins = 1_000_000 10 | rems = 100_000 11 | dels = 0 12 | gets = 1_000_000 13 | writers = 1 14 | readers = 16 15 | validate = true 16 | -------------------------------------------------------------------------------- /src/bin/rdms/perf-profiles/default-lmdb.toml: -------------------------------------------------------------------------------- 1 | name = "perf-lmdb" 2 | dir = "./perf-data" 3 | key_size = 16 4 | value_size = 16 5 | loads = 10_000_000 6 | sets = 1_000_000 7 | rems = 100_000 8 | gets = 1_000_000 9 | writers = 1 10 | readers = 16 11 | -------------------------------------------------------------------------------- /src/bin/rdms/perf-profiles/default-robt.toml: -------------------------------------------------------------------------------- 1 | key_type = "u64" 2 | value_type = "u64" 3 | key_size = 16 4 | value_size = 16 5 | bitmap = "nobitmap" 6 | 7 | [initial] 8 | sets = 1_000_000 9 | ins = 1_000_000 10 | rems = 100_000 11 | dels = 0 12 | 13 | [initial.robt] 14 | name = "rdms-robt-perf" 15 | dir = "/tmp/rdms-perf" 16 | z_blocksize = 4096 17 | m_blocksize = 4096 18 | v_blocksize = 4096 19 | delta_ok = true 20 | value_in_vlog = true 21 | flush_queue_size = 64 22 | 23 | [[incrs]] 24 | name = "rdms-robt-perf-incr1" 25 | sets = 1_000_000 26 | ins = 1_000_000 27 | rems = 100_000 28 | dels = 0 29 | compact = true 30 | compact_name = "rdms-robt-perf-compact1" 31 | 32 | [load] 33 | gets = 1_000_000 34 | get_versions = 0 35 | iter = true 36 | iter_versions = false 37 | reverse = true 38 | reverse_versions = false 39 | readers = 1 40 | validate = true 41 | -------------------------------------------------------------------------------- /src/bin/rdms/perf-profiles/default-wral.toml: -------------------------------------------------------------------------------- 1 | name = "wral-perf" 2 | ops = 1_000_000 3 | payload = 32 4 | threads = 8 5 | journal_limit = 100_000_000 6 | nosync = true 7 | -------------------------------------------------------------------------------- /src/bitmaps/croaring.rs: -------------------------------------------------------------------------------- 1 | //! Module `croaring` implement the [Bloom] trait for [roaring bitmap][roaring-bitmap]. 2 | //! 3 | //! [Bloom]: crate::dbs::Bloom 4 | //! [roaring-bitmap]: https://roaringbitmap.org 5 | 6 | use croaring::bitmap::Bitmap; 7 | 8 | use std::{ 9 | convert::TryInto, 10 | hash::{Hash, Hasher}, 11 | }; 12 | 13 | use crate::{dbs::Bloom, hash::CityHasher, Error, Result}; 14 | 15 | // TODO: right now we are using cityhash-rs, make hasher generic. 16 | 17 | pub struct CRoaring { 18 | bitmap: Bitmap, 19 | } 20 | 21 | impl Default for CRoaring { 22 | fn default() -> CRoaring { 23 | CRoaring::new() 24 | } 25 | } 26 | 27 | impl CRoaring { 28 | pub fn new() -> CRoaring { 29 | CRoaring { bitmap: Bitmap::create() } 30 | } 31 | } 32 | 33 | impl Bloom for CRoaring { 34 | #[inline] 35 | fn len(&self) -> Result { 36 | err_at!(FailConvert, self.bitmap.cardinality().try_into()) 37 | } 38 | 39 | #[inline] 40 | fn add_key(&mut self, element: &Q) { 41 | let mut hasher = CityHasher::default(); 42 | element.hash(&mut hasher); 43 | let code: u64 = hasher.finish(); 44 | let digest = (((code >> 32) ^ code) & 0xFFFFFFFF) as u32; 45 | 46 | self.add_digest32(digest); 47 | } 48 | 49 | #[inline] 50 | fn add_keys(&mut self, keys: &[Q]) { 51 | for key in keys.iter() { 52 | self.add_key(key) 53 | } 54 | } 55 | 56 | #[inline] 57 | fn add_digest32(&mut self, digest: u32) { 58 | self.bitmap.add(digest) 59 | } 60 | 61 | #[inline] 62 | fn add_digests32(&mut self, digests: &[u32]) { 63 | self.bitmap.add_many(digests) 64 | } 65 | 66 | #[inline] 67 | fn add_digest64(&mut self, digest: u64) { 68 | let digest = ((digest >> 32) ^ (digest & 0xFFFFFFFF)) as u32; 69 | self.bitmap.add(digest) 70 | } 71 | 72 | #[inline] 73 | fn add_digests64(&mut self, digests: &[u64]) { 74 | for digest in digests.iter() { 75 | self.add_digest64(*digest) 76 | } 77 | } 78 | 79 | #[inline] 80 | fn build(&mut self) -> Result<()> { 81 | Ok(()) 82 | } 83 | 84 | #[inline] 85 | fn contains(&self, element: &Q) -> bool { 86 | let mut hasher = CityHasher::default(); 87 | 88 | element.hash(&mut hasher); 89 | let code: u64 = hasher.finish(); 90 | let digest = (((code >> 32) ^ code) & 0xFFFFFFFF) as u32; 91 | 92 | self.bitmap.contains(digest) 93 | } 94 | 95 | #[inline] 96 | fn to_bytes(&self) -> Result> { 97 | Ok(self.bitmap.serialize()) 98 | } 99 | 100 | #[inline] 101 | fn from_bytes(buf: &[u8]) -> Result<(CRoaring, usize)> { 102 | let val = CRoaring { bitmap: Bitmap::deserialize(buf) }; 103 | let n = buf.len(); 104 | Ok((val, n)) 105 | } 106 | 107 | #[inline] 108 | fn or(&self, other: &CRoaring) -> Result { 109 | Ok(CRoaring { bitmap: self.bitmap.or(&other.bitmap) }) 110 | } 111 | } 112 | 113 | #[cfg(test)] 114 | #[path = "croaring_test.rs"] 115 | mod croaring_test; 116 | -------------------------------------------------------------------------------- /src/bitmaps/croaring_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | 3 | use crate::dbs::Bloom; 4 | 5 | use super::*; 6 | 7 | #[test] 8 | fn test_croaring_bitmap() { 9 | let seed: u64 = random(); 10 | let mut rng = StdRng::seed_from_u64(seed); 11 | println!("test_croaring seed:{}", seed); 12 | 13 | let keys: Vec = (0..100_000).map(|_| rng.gen::()).collect(); 14 | 15 | let mut digests = vec![]; 16 | let filter = { 17 | let mut filter = CRoaring::new(); 18 | for key in keys.iter() { 19 | let digest = { 20 | let mut hasher = CityHasher::default(); 21 | key.hash(&mut hasher); 22 | let code: u64 = hasher.finish(); 23 | (((code >> 32) ^ code) & 0xFFFFFFFF) as u32 24 | }; 25 | digests.push(digest); 26 | 27 | filter.add_key(&key); 28 | } 29 | filter.build().expect("fail building croaring filter"); 30 | filter 31 | }; 32 | digests.sort_unstable(); 33 | digests.dedup(); 34 | println!("digests {}", digests.len()); 35 | 36 | assert_eq!(filter.len(), Ok(digests.len()), "{:?} {}", filter.len(), keys.len()); 37 | 38 | for key in keys.iter() { 39 | assert!(filter.contains(key), "key {} not present", key); 40 | } 41 | 42 | let filter = { 43 | let val = filter.to_bytes().unwrap(); 44 | let (filter, n) = CRoaring::from_bytes(&val).unwrap(); 45 | assert_eq!(n, val.len(), "{} {}", n, val.len()); 46 | filter 47 | }; 48 | for key in keys.iter() { 49 | assert!(filter.contains(key), "key {} not present", key); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/bitmaps/fuse16.rs: -------------------------------------------------------------------------------- 1 | use cbordata::{Cbor, FromCbor, IntoCbor}; 2 | use xorfilter::Fuse16; 3 | 4 | use std::hash::{BuildHasher, Hash}; 5 | 6 | use crate::{dbs, Error, Result}; 7 | 8 | impl dbs::Bloom for Fuse16 9 | where 10 | H: Clone + BuildHasher + From> + Into>, 11 | { 12 | fn len(&self) -> Result { 13 | match self.len() { 14 | Some(n) => Ok(n), 15 | None => err_at!(NotImplemented, msg: "Fuse16 does not implement length"), 16 | } 17 | } 18 | 19 | fn add_key(&mut self, key: &Q) { 20 | self.insert(key) 21 | } 22 | 23 | fn add_keys(&mut self, keys: &[Q]) { 24 | self.populate(keys) 25 | } 26 | 27 | fn add_digest32(&mut self, digest: u32) { 28 | self.populate_keys(&[u64::from(digest)]) 29 | } 30 | 31 | fn add_digests32(&mut self, digests: &[u32]) { 32 | let digests: Vec = digests.iter().map(|x| u64::from(*x)).collect(); 33 | self.populate_keys(&digests) 34 | } 35 | 36 | fn add_digest64(&mut self, digest: u64) { 37 | self.populate_keys(&[digest]) 38 | } 39 | 40 | fn add_digests64(&mut self, digests: &[u64]) { 41 | self.populate_keys(digests) 42 | } 43 | 44 | fn build(&mut self) -> Result<()> { 45 | err_at!(Fatal, self.build()) 46 | } 47 | 48 | fn contains(&self, element: &Q) -> bool { 49 | self.contains(element) 50 | } 51 | 52 | fn to_bytes(&self) -> Result> { 53 | let cbor_val = err_at!(FailCbor, self.clone().into_cbor())?; 54 | let mut buf: Vec = vec![]; 55 | err_at!(FailCbor, cbor_val.encode(&mut buf))?; 56 | 57 | Ok(buf) 58 | } 59 | 60 | fn from_bytes(mut buf: &[u8]) -> Result<(Self, usize)> { 61 | let (cbor_val, n) = err_at!(IOError, Cbor::decode(&mut buf))?; 62 | Ok((err_at!(FailCbor, Fuse16::::from_cbor(cbor_val))?, n)) 63 | } 64 | 65 | fn or(&self, _other: &Self) -> Result { 66 | err_at!(NotImplemented, msg: "Fuse16 does not implement or() method") 67 | } 68 | } 69 | 70 | #[cfg(test)] 71 | #[path = "fuse16_test.rs"] 72 | mod fuse16_test; 73 | -------------------------------------------------------------------------------- /src/bitmaps/fuse16_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | use xorfilter::BuildHasherDefault; 3 | 4 | use crate::dbs::Bloom; 5 | 6 | use super::*; 7 | 8 | #[test] 9 | fn test_fuse16_bitmap() { 10 | let seed: u64 = random(); 11 | let mut rng = StdRng::seed_from_u64(seed); 12 | println!("test_fuse16 seed:{}", seed); 13 | 14 | let keys: Vec = (0..100_000).map(|_| rng.gen::()).collect(); 15 | 16 | let filter = { 17 | let mut filter = Fuse16::::new(keys.len() as u32); 18 | for key in keys.clone().into_iter() { 19 | filter.add_key(&key); 20 | } 21 | filter.build().expect("fail building fuse16 filter"); 22 | filter 23 | }; 24 | 25 | for key in keys.iter() { 26 | assert!(filter.contains(key), "key {} not present", key); 27 | } 28 | 29 | let filter = { 30 | let val = filter.to_bytes().unwrap(); 31 | let (filter, n) = Fuse16::::from_bytes(&val).unwrap(); 32 | assert_eq!(n, val.len(), "{} {}", n, val.len()); 33 | filter 34 | }; 35 | for key in keys.iter() { 36 | assert!(filter.contains(key), "key {} not present", key); 37 | } 38 | assert_eq!(filter.len(), Some(keys.len()), "{:?} {}", filter.len(), keys.len()); 39 | } 40 | -------------------------------------------------------------------------------- /src/bitmaps/fuse8.rs: -------------------------------------------------------------------------------- 1 | use cbordata::{Cbor, FromCbor, IntoCbor}; 2 | use xorfilter::Fuse8; 3 | 4 | use std::hash::{BuildHasher, Hash}; 5 | 6 | use crate::{dbs, Error, Result}; 7 | 8 | impl dbs::Bloom for Fuse8 9 | where 10 | H: Clone + BuildHasher + From> + Into>, 11 | { 12 | fn len(&self) -> Result { 13 | match self.len() { 14 | Some(n) => Ok(n), 15 | None => err_at!(NotImplemented, msg: "Fuse8 does not implement length"), 16 | } 17 | } 18 | 19 | #[inline] 20 | fn add_key(&mut self, key: &Q) { 21 | self.insert(key) 22 | } 23 | 24 | #[inline] 25 | fn add_keys(&mut self, keys: &[Q]) { 26 | self.populate(keys) 27 | } 28 | 29 | #[inline] 30 | fn add_digest32(&mut self, digest: u32) { 31 | self.populate_keys(&[u64::from(digest)]) 32 | } 33 | 34 | #[inline] 35 | fn add_digests32(&mut self, digests: &[u32]) { 36 | let digests: Vec = digests.iter().map(|x| u64::from(*x)).collect(); 37 | self.populate_keys(&digests) 38 | } 39 | 40 | #[inline] 41 | fn add_digest64(&mut self, digest: u64) { 42 | self.populate_keys(&[digest]) 43 | } 44 | 45 | #[inline] 46 | fn add_digests64(&mut self, digests: &[u64]) { 47 | self.populate_keys(digests) 48 | } 49 | 50 | fn build(&mut self) -> Result<()> { 51 | err_at!(Fatal, self.build()) 52 | } 53 | 54 | fn contains(&self, element: &Q) -> bool { 55 | self.contains(element) 56 | } 57 | 58 | fn to_bytes(&self) -> Result> { 59 | let cbor_val = err_at!(FailCbor, self.clone().into_cbor())?; 60 | let mut buf: Vec = vec![]; 61 | err_at!(FailCbor, cbor_val.encode(&mut buf))?; 62 | 63 | Ok(buf) 64 | } 65 | 66 | fn from_bytes(mut buf: &[u8]) -> Result<(Self, usize)> { 67 | let (cbor_val, n) = err_at!(IOError, Cbor::decode(&mut buf))?; 68 | Ok((err_at!(FailCbor, Fuse8::::from_cbor(cbor_val))?, n)) 69 | } 70 | 71 | fn or(&self, _other: &Self) -> Result { 72 | err_at!(NotImplemented, msg: "Fuse8 does not implement or() method") 73 | } 74 | } 75 | 76 | #[cfg(test)] 77 | #[path = "fuse8_test.rs"] 78 | mod fuse8_test; 79 | -------------------------------------------------------------------------------- /src/bitmaps/fuse8_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | use xorfilter::BuildHasherDefault; 3 | 4 | use crate::dbs::Bloom; 5 | 6 | use super::*; 7 | 8 | #[test] 9 | fn test_fuse8_bitmap() { 10 | let seed: u64 = random(); 11 | let mut rng = StdRng::seed_from_u64(seed); 12 | println!("test_fuse8 seed:{}", seed); 13 | 14 | let keys: Vec = (0..100_000).map(|_| rng.gen::()).collect(); 15 | 16 | let filter = { 17 | let mut filter = Fuse8::::new(keys.len() as u32); 18 | for key in keys.clone().into_iter() { 19 | filter.add_key(&key); 20 | } 21 | filter.build().expect("fail building fuse8 filter"); 22 | filter 23 | }; 24 | 25 | for key in keys.iter() { 26 | assert!(filter.contains(key), "key {} not present", key); 27 | } 28 | 29 | let filter = { 30 | let val = filter.to_bytes().unwrap(); 31 | let (filter, n) = Fuse8::::from_bytes(&val).unwrap(); 32 | assert_eq!(n, val.len(), "{} {}", n, val.len()); 33 | filter 34 | }; 35 | for key in keys.iter() { 36 | assert!(filter.contains(key), "key {} not present", key); 37 | } 38 | assert_eq!(filter.len(), Some(keys.len()), "{:?} {}", filter.len(), keys.len()); 39 | } 40 | -------------------------------------------------------------------------------- /src/bitmaps/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement adaptors for popular bitmap filters. 2 | 3 | mod croaring; 4 | mod fuse16; 5 | mod fuse8; 6 | mod nobitmap; 7 | mod xor8; 8 | 9 | pub use self::croaring::CRoaring; 10 | pub use nobitmap::NoBitmap; 11 | // Re-imported from xorfilter package. 12 | pub use xorfilter::Fuse16; 13 | // Re-imported from xorfilter package. 14 | pub use xorfilter::Fuse8; 15 | // Re-imported from xorfilter package. 16 | pub use xorfilter::Xor8; 17 | -------------------------------------------------------------------------------- /src/bitmaps/nobitmap.rs: -------------------------------------------------------------------------------- 1 | //! Module `nobitmap` define a dummy bitmap index. 2 | 3 | use std::hash::Hash; 4 | 5 | use crate::{dbs, Result}; 6 | 7 | #[derive(Default, Clone)] 8 | pub struct NoBitmap; 9 | 10 | impl dbs::Bloom for NoBitmap { 11 | #[inline] 12 | fn len(&self) -> Result { 13 | Ok(0) 14 | } 15 | 16 | #[inline] 17 | fn add_key(&mut self, _key: &Q) { 18 | // Do nothing 19 | } 20 | 21 | #[inline] 22 | fn add_keys(&mut self, _keys: &[Q]) { 23 | // Do nothing 24 | } 25 | 26 | #[inline] 27 | fn add_digest32(&mut self, _digest: u32) { 28 | // Do nothing 29 | } 30 | 31 | #[inline] 32 | fn add_digests32(&mut self, _digests: &[u32]) { 33 | // Do nothing 34 | } 35 | 36 | #[inline] 37 | fn add_digest64(&mut self, _digest: u64) { 38 | // Do nothing 39 | } 40 | 41 | #[inline] 42 | fn add_digests64(&mut self, _digests: &[u64]) { 43 | // Do nothing 44 | } 45 | 46 | #[inline] 47 | fn build(&mut self) -> Result<()> { 48 | Ok(()) 49 | } 50 | 51 | #[inline] 52 | fn contains(&self, _element: &Q) -> bool { 53 | true // false positives are okay. 54 | } 55 | 56 | #[inline] 57 | fn to_bytes(&self) -> Result> { 58 | Ok(vec![]) 59 | } 60 | 61 | #[inline] 62 | fn from_bytes(_buf: &[u8]) -> Result<(NoBitmap, usize)> { 63 | Ok((NoBitmap, 0)) 64 | } 65 | 66 | #[inline] 67 | fn or(&self, _other: &NoBitmap) -> Result { 68 | Ok(NoBitmap) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/bitmaps/xor8.rs: -------------------------------------------------------------------------------- 1 | use cbordata::{Cbor, FromCbor, IntoCbor}; 2 | use xorfilter::Xor8; 3 | 4 | use std::hash::{BuildHasher, Hash}; 5 | 6 | use crate::{dbs, Error, Result}; 7 | 8 | impl dbs::Bloom for Xor8 9 | where 10 | H: Clone + BuildHasher + From> + Into>, 11 | { 12 | fn len(&self) -> Result { 13 | match self.len() { 14 | Some(n) => Ok(n), 15 | None => err_at!(NotImplemented, msg: "Xor8 does not implement length"), 16 | } 17 | } 18 | 19 | #[inline] 20 | fn add_key(&mut self, key: &Q) { 21 | self.insert(key) 22 | } 23 | 24 | #[inline] 25 | fn add_keys(&mut self, keys: &[Q]) { 26 | self.populate(keys) 27 | } 28 | 29 | #[inline] 30 | fn add_digest32(&mut self, digest: u32) { 31 | self.populate_keys(&[u64::from(digest)]) 32 | } 33 | 34 | #[inline] 35 | fn add_digests32(&mut self, digests: &[u32]) { 36 | let digests: Vec = digests.iter().map(|x| u64::from(*x)).collect(); 37 | self.populate_keys(&digests) 38 | } 39 | 40 | #[inline] 41 | fn add_digest64(&mut self, digest: u64) { 42 | self.populate_keys(&[digest]) 43 | } 44 | 45 | #[inline] 46 | fn add_digests64(&mut self, digests: &[u64]) { 47 | self.populate_keys(digests) 48 | } 49 | 50 | #[inline] 51 | fn build(&mut self) -> Result<()> { 52 | err_at!(Fatal, self.build()) 53 | } 54 | 55 | #[inline] 56 | fn contains(&self, element: &Q) -> bool { 57 | self.contains(element) 58 | } 59 | 60 | fn to_bytes(&self) -> Result> { 61 | let cbor_val = err_at!(FailCbor, self.clone().into_cbor())?; 62 | let mut buf: Vec = vec![]; 63 | err_at!(FailCbor, cbor_val.encode(&mut buf))?; 64 | 65 | Ok(buf) 66 | } 67 | 68 | fn from_bytes(mut buf: &[u8]) -> Result<(Self, usize)> { 69 | let (cbor_val, n) = err_at!(IOError, Cbor::decode(&mut buf))?; 70 | Ok((err_at!(FailCbor, Xor8::::from_cbor(cbor_val))?, n)) 71 | } 72 | 73 | fn or(&self, _other: &Self) -> Result { 74 | err_at!(NotImplemented, msg: "xor8 does not implement or() method") 75 | } 76 | } 77 | 78 | #[cfg(test)] 79 | #[path = "xor8_test.rs"] 80 | mod xor8_test; 81 | -------------------------------------------------------------------------------- /src/bitmaps/xor8_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | use xorfilter::BuildHasherDefault; 3 | 4 | use crate::dbs::Bloom; 5 | 6 | use super::*; 7 | 8 | #[test] 9 | fn test_xor8_bitmap() { 10 | let seed: u64 = random(); 11 | let mut rng = StdRng::seed_from_u64(seed); 12 | println!("test_xor8 seed:{}", seed); 13 | 14 | let keys: Vec = (0..100_000).map(|_| rng.gen::()).collect(); 15 | 16 | let filter = { 17 | let mut filter = Xor8::::new(); 18 | for key in keys.clone().into_iter() { 19 | filter.add_key(&key); 20 | } 21 | filter.build().expect("fail building xor8 filter"); 22 | filter 23 | }; 24 | 25 | for key in keys.iter() { 26 | assert!(filter.contains(key), "key {} not present", key); 27 | } 28 | 29 | let filter = { 30 | let val = ::to_bytes(&filter).unwrap(); 31 | let (filter, n) = 32 | as dbs::Bloom>::from_bytes(&val).unwrap(); 33 | assert_eq!(n, val.len(), "{} {}", n, val.len()); 34 | filter 35 | }; 36 | for key in keys.iter() { 37 | assert!(filter.contains(key), "key {} not present", key); 38 | } 39 | assert_eq!(filter.len(), Some(keys.len()), "{:?} {}", filter.len(), keys.len()); 40 | } 41 | -------------------------------------------------------------------------------- /src/clru/README.md: -------------------------------------------------------------------------------- 1 | Cache: Least Recently Used Eviction strategy 2 | -------------------------------------------------------------------------------- /src/clru/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement concurrent LRU cache. 2 | 3 | use std::{fmt, sync::atomic::AtomicPtr}; 4 | 5 | use crate::{dbs, Result}; 6 | 7 | mod access; 8 | mod evictor; 9 | mod lru; 10 | 11 | use access::Access; 12 | use evictor::Evictor; 13 | pub use lru::{Config, Lru, Stats}; 14 | 15 | // wrap the value parameter. 16 | pub struct Value 17 | where 18 | K: fmt::Debug, 19 | { 20 | value: V, 21 | access: AtomicPtr>, 22 | } 23 | 24 | impl dbs::Footprint for Value 25 | where 26 | K: fmt::Debug, 27 | V: dbs::Footprint, 28 | { 29 | fn footprint(&self) -> Result { 30 | let mut size = std::mem::size_of_val(self) as isize; 31 | size += self.value.footprint()?; 32 | Ok(size) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/dba/mod.rs: -------------------------------------------------------------------------------- 1 | //! Traits and Types, related to core-database for asynchronous distribution of data. 2 | //! 3 | //! _**Content addressing**_ 4 | //! 5 | //! Asynchronous datastores are content addressed, where the contents are called objects 6 | //! and their address is computed using a hash-digest. Hash-digest can also be 7 | //! cryptographically strong. For example, below we compute address for JSON object 8 | //! using SHA1: 9 | //! 10 | //! ```ignore 11 | //! content: "{ "planet": "earth", "size": 6371 }" 12 | //! address: eefb77629ed77802247c30e9462ff8886e9cbcf6 13 | //! ``` 14 | //! 15 | //! _**`Object`**_ 16 | //! 17 | //! The design concept of object is such that they can be wired together using 18 | //! parent-child relationship to represent a tree. Refer to [Object] enumeration type. 19 | //! 20 | //! _**`Oid` a.k.a Object-id**_ 21 | //! 22 | //! Refer to [Oid] enumeration type. It is typically a hash digest value generated on 23 | //! the object's content. 24 | //! 25 | //! 26 | //! _**`AsKey`**_ 27 | //! 28 | //! In addition to accessing the DBA stores using content-addressing, it is also possible 29 | //! to access them using object-keys. Typical example is accessing files in a file system. 30 | //! While each file can be considered as object, path to reach the file can be considered 31 | //! as its key. To make this idea explicit, types that are to be used as keys to access 32 | //! a DBA store _shall_ implement the [AsKey] trait. 33 | //! 34 | //! `NOTE`: Key, in a DBA store, is _not part of the object_. Note that the other way 35 | //! to access a DBA store is using content-addressing, that is, using the object's digest 36 | //! as its key. 37 | 38 | use crate::Result; 39 | 40 | mod entry; 41 | mod git; 42 | mod types; 43 | 44 | pub use entry::{Edge, Entry, Object, Oid, Type, User}; 45 | pub use types::Str; 46 | 47 | /// AsKey trait can be implemented by any type, that can then be used as key to 48 | /// access `dba` datastores. 49 | pub trait AsKey { 50 | /// Convert type into list of strings. Semantically, each element in the list 51 | /// can be treated as a children to the previous element and the first element 52 | /// is a child of a ROOT. This can also be viewed as file-system path. 53 | fn to_key_path(&self) -> Result>; 54 | } 55 | -------------------------------------------------------------------------------- /src/dba/types.rs: -------------------------------------------------------------------------------- 1 | //! Module implement [dba::AsKey] trait for [String], [path::Path] [path::PathBuf]. 2 | //! 3 | //! And custom types that are handy to use as DBA store keys. `Str`. 4 | 5 | use std::path; 6 | 7 | use crate::{dba, Error, Result}; 8 | 9 | /// Type is a convenience type to wrap any ordinary string into path components. 10 | /// 11 | /// EG: a string like `hello-world` shall be converted to ["h", "e", "l", "hello-word"] 12 | /// when a depth of `3` is used. For a depth `4`, it shall be converted to 13 | /// ["h", "e", "l", "l", "hello-word"]. Use [dba::AsKey] trait for conversion. 14 | pub struct Str { 15 | key: String, 16 | depth: usize, 17 | } 18 | 19 | impl Str { 20 | /// Default depth to convert any string into path components. 21 | pub const DEFAULT_DEPTH: usize = 3; 22 | } 23 | 24 | impl<'a> From<(&'a str, usize)> for Str { 25 | fn from((key, depth): (&str, usize)) -> Str { 26 | Str { key: key.to_string(), depth } 27 | } 28 | } 29 | 30 | impl<'a> From<&'a str> for Str { 31 | fn from(key: &str) -> Str { 32 | Str { key: key.to_string(), depth: Str::DEFAULT_DEPTH } 33 | } 34 | } 35 | 36 | impl<'a> From for Str { 37 | fn from(key: String) -> Str { 38 | Str { key, depth: Str::DEFAULT_DEPTH } 39 | } 40 | } 41 | 42 | impl dba::AsKey for Str { 43 | fn to_key_path(&self) -> Result> { 44 | let parts = match self.key.len() { 45 | 0 => vec![], 46 | _ => { 47 | let mut parts: Vec = 48 | self.key.chars().take(self.depth).map(|ch| ch.to_string()).collect(); 49 | 50 | parts.push(self.key.to_string()); 51 | parts 52 | } 53 | }; 54 | 55 | Ok(parts) 56 | } 57 | } 58 | 59 | impl dba::AsKey for String { 60 | fn to_key_path(&self) -> Result> { 61 | Ok(self.split('/').map(ToString::to_string).collect()) 62 | } 63 | } 64 | 65 | impl dba::AsKey for path::Path { 66 | fn to_key_path(&self) -> Result> { 67 | let mut items = vec![]; 68 | for c in self.components() { 69 | match c { 70 | path::Component::Normal(c) => match c.to_str() { 71 | Some(c) => items.push(c.to_string()), 72 | None => err_at!(InvalidInput, msg: "key {:?} is invalid", self)?, 73 | }, 74 | _ => err_at!(InvalidInput, msg: "key {:?} is invalid", self)?, 75 | } 76 | } 77 | Ok(items) 78 | } 79 | } 80 | 81 | impl dba::AsKey for path::PathBuf { 82 | fn to_key_path(&self) -> Result> { 83 | let mut items = vec![]; 84 | for c in self.components() { 85 | match c { 86 | path::Component::Normal(c) => match c.to_str() { 87 | Some(c) => items.push(c.to_string()), 88 | None => err_at!(InvalidInput, msg: "key {:?} is invalid", self)?, 89 | }, 90 | _ => err_at!(InvalidInput, msg: "key {:?} is invalid", self)?, 91 | } 92 | } 93 | Ok(items) 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /src/dbs/binary.rs: -------------------------------------------------------------------------------- 1 | use cbordata::Cborize; 2 | 3 | use std::{convert::TryFrom, hash::Hash}; 4 | 5 | use crate::{dbs, Error, Result}; 6 | 7 | const BINARY_VER: u32 = 0x00170001_u32; 8 | 9 | #[derive(Clone, Default, Debug, PartialEq, PartialOrd, Eq, Ord, Hash, Cborize)] 10 | pub struct Binary { 11 | pub val: Vec, 12 | } 13 | 14 | impl Binary { 15 | const ID: u32 = BINARY_VER; 16 | } 17 | 18 | impl ToString for Binary { 19 | fn to_string(&self) -> String { 20 | std::str::from_utf8(&self.val).unwrap().to_string() 21 | } 22 | } 23 | 24 | impl dbs::Diff for Binary { 25 | type Delta = Self; 26 | 27 | fn diff(&self, old: &Self) -> Self::Delta { 28 | Binary { val: old.val.to_vec() } 29 | } 30 | 31 | fn merge(&self, delta: &Self::Delta) -> Self { 32 | Binary { val: delta.val.to_vec() } 33 | } 34 | } 35 | 36 | impl dbs::Footprint for Binary { 37 | fn footprint(&self) -> Result { 38 | use std::mem::size_of; 39 | let size = size_of::() + self.val.capacity(); 40 | err_at!(FailConvert, isize::try_from(size)) 41 | } 42 | } 43 | 44 | impl<'a> arbitrary::Arbitrary<'a> for Binary { 45 | fn arbitrary(u: &mut arbitrary::Unstructured) -> arbitrary::Result { 46 | let size = u.arbitrary::()? % 1024; 47 | let val = match u.arbitrary::()? { 48 | 0 => Binary::default(), 49 | val => Binary { 50 | val: format!("{:0width$}", val, width = size).as_bytes().to_vec(), 51 | }, 52 | }; 53 | Ok(val) 54 | } 55 | } 56 | 57 | impl rand::distributions::Distribution for rand::distributions::Standard { 58 | fn sample(&self, rng: &mut R) -> Binary { 59 | let (val, size) = (rng.gen::(), rng.gen::() % 1024); 60 | Binary { 61 | val: format!("{:0width$}", val, width = size).as_bytes().to_vec(), 62 | } 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /src/dbs/delta.rs: -------------------------------------------------------------------------------- 1 | use cbordata::Cborize; 2 | 3 | use crate::{dbs::Footprint, Error, Result}; 4 | 5 | const DELTA_VER: u32 = 0x00030001; 6 | 7 | /// Delta type, describe the older-versions of an indexed entry. 8 | #[derive(Clone, Debug, Eq, PartialEq, Cborize)] 9 | pub enum Delta { 10 | U { delta: D, seqno: u64 }, 11 | D { seqno: u64 }, 12 | } 13 | 14 | impl Footprint for Delta 15 | where 16 | D: Footprint, 17 | { 18 | /// Return the previous versions of this entry as Deltas. 19 | fn footprint(&self) -> Result { 20 | use std::{convert::TryFrom, mem::size_of}; 21 | 22 | let mut size = err_at!(FailConvert, isize::try_from(size_of::>()))?; 23 | size -= err_at!(FailConvert, isize::try_from(size_of::()))?; 24 | 25 | size += match self { 26 | Delta::U { delta, .. } => delta.footprint()?, 27 | Delta::D { .. } => 0, 28 | }; 29 | 30 | Ok(size) 31 | } 32 | } 33 | 34 | impl Delta { 35 | pub const ID: u32 = DELTA_VER; 36 | 37 | #[inline] 38 | pub fn new_upsert(delta: D, seqno: u64) -> Delta { 39 | Delta::U { delta, seqno } 40 | } 41 | 42 | #[inline] 43 | pub fn new_delete(seqno: u64) -> Delta { 44 | Delta::D { seqno } 45 | } 46 | } 47 | 48 | impl Delta { 49 | #[inline] 50 | pub fn to_seqno(&self) -> u64 { 51 | match self { 52 | Delta::U { seqno, .. } => *seqno, 53 | Delta::D { seqno } => *seqno, 54 | } 55 | } 56 | 57 | #[inline] 58 | pub fn to_delta(&self) -> Option 59 | where 60 | D: Clone, 61 | { 62 | match self { 63 | Delta::U { delta, .. } => Some(delta.clone()), 64 | Delta::D { .. } => None, 65 | } 66 | } 67 | 68 | #[inline] 69 | pub fn unpack(&self) -> (u64, Option) 70 | where 71 | D: Clone, 72 | { 73 | match self { 74 | Delta::U { delta, seqno } => (*seqno, Some(delta.clone())), 75 | Delta::D { seqno } => (*seqno, None), 76 | } 77 | } 78 | } 79 | 80 | #[cfg(test)] 81 | #[path = "delta_test.rs"] 82 | mod delta_test; 83 | -------------------------------------------------------------------------------- /src/dbs/delta_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_delta_new() { 5 | let delta = Delta::new_upsert(0x1234_u64, 1); 6 | assert_eq!(delta.to_seqno(), 1); 7 | assert_eq!(delta.to_delta(), Some(0x1234)); 8 | assert_eq!(delta.unpack(), (1, Some(0x1234))); 9 | 10 | let delta: Delta = Delta::new_delete(2); 11 | assert_eq!(delta.to_seqno(), 2); 12 | assert_eq!(delta.to_delta(), None); 13 | assert_eq!(delta.unpack(), (2, None)); 14 | } 15 | 16 | #[test] 17 | fn test_delta_footprint() { 18 | let delta = Delta::new_upsert(0x1234_u64, 2); 19 | assert_eq!(delta.footprint().unwrap(), 24); 20 | let delta = Delta::new_upsert(vec![0x1234_u64], 2); 21 | assert_eq!(delta.footprint().unwrap(), 49, "{}", delta.footprint().unwrap()); 22 | let delta = Delta::new_upsert(vec!["hello world".to_string()], 2); 23 | assert_eq!(delta.footprint().unwrap(), 76, "{}", delta.footprint().unwrap()); 24 | 25 | let delta: Delta = Delta::new_delete(2); 26 | assert_eq!(delta.footprint().unwrap(), 16); 27 | } 28 | -------------------------------------------------------------------------------- /src/dbs/diff.rs: -------------------------------------------------------------------------------- 1 | //! Module define [Diff] trait and implement the trait for native rust types. 2 | //! 3 | //! [NoDiff] can be used for implementing [Diff] trait on user-defined value types 4 | //! Refer [NoDiff] for detail. 5 | 6 | use cbordata::Cborize; 7 | 8 | // TODO: give a new type number for high 16-bits. 9 | const NDIFF_VER: u32 = 0x00070001; 10 | 11 | /// Trait for diff-able values. 12 | /// 13 | /// Version control is a necessary feature for non-destructive writes. 14 | /// Using this trait it is possible to generate concise older versions as 15 | /// deltas. Note that this version control follows centralized behavior, as 16 | /// apposed to distributed behavior, for which we need three-way-merge. 17 | /// 18 | /// If, 19 | /// ```notest 20 | /// P = old value; C = new value; D = difference between P and C 21 | /// ``` 22 | /// 23 | /// Then, 24 | /// ```notest 25 | /// D = C - P (diff operation) 26 | /// P = C - D (merge operation, to get old value) 27 | /// ``` 28 | pub trait Diff: Sized + Clone + From<::Delta> { 29 | type Delta: Clone + From; 30 | 31 | /// Return the delta between two consecutive versions of a value. 32 | /// `Delta = New - Old`. 33 | fn diff(&self, old: &Self) -> Self::Delta; 34 | 35 | /// Merge delta with newer version to return older version of the value. 36 | /// `Old = New - Delta`. 37 | fn merge(&self, delta: &Self::Delta) -> Self; 38 | } 39 | 40 | /// Associated type for value-type that don't implement [Diff] trait, i.e 41 | /// whereever applicable, use NoDiff as delta type. 42 | #[derive(Clone, Default, Debug, Eq, PartialEq, Cborize)] 43 | pub struct NoDiff; 44 | 45 | impl NoDiff { 46 | pub const ID: u32 = NDIFF_VER; 47 | } 48 | 49 | #[cfg(test)] 50 | #[path = "diff_test.rs"] 51 | mod diff_test; 52 | -------------------------------------------------------------------------------- /src/dbs/diff_test.rs: -------------------------------------------------------------------------------- 1 | use cbordata::{FromCbor, IntoCbor}; 2 | 3 | use super::*; 4 | 5 | #[test] 6 | fn test_nodiff() { 7 | let no_diff = NoDiff; 8 | 9 | let val = no_diff.clone().into_cbor().unwrap(); 10 | assert_eq!(NoDiff::from_cbor(val).unwrap(), no_diff) 11 | } 12 | -------------------------------------------------------------------------------- /src/dbs/types.rs: -------------------------------------------------------------------------------- 1 | //! Implement Diff, Footprint traits for native types and std-types. 2 | 3 | use std::convert::TryFrom; 4 | 5 | use crate::{ 6 | dbs::{Diff, Footprint}, 7 | Error, Result, 8 | }; 9 | 10 | macro_rules! impl_diff_basic_types { 11 | ($($type:ty),*) => ( 12 | $( 13 | impl Diff for $type { 14 | type Delta = $type; 15 | 16 | fn diff(&self, old: &$type) -> Self::Delta { 17 | *old 18 | } 19 | 20 | fn merge(&self, delta: &Self::Delta) -> Self { 21 | *delta 22 | } 23 | } 24 | )* 25 | ); 26 | } 27 | 28 | // TODO: implement Diff for all Rust native types - char, f32, f64, and others 29 | impl_diff_basic_types![ 30 | bool, i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize 31 | ]; 32 | 33 | macro_rules! impl_footprint_basic_types { 34 | ($($type:ty),*) => ( 35 | $( 36 | impl Footprint for $type { 37 | fn footprint(&self) -> Result { 38 | use std::mem::size_of; 39 | err_at!(FailConvert, isize::try_from(size_of::<$type>())) 40 | } 41 | } 42 | )* 43 | ); 44 | } 45 | 46 | impl_footprint_basic_types![ 47 | bool, i8, i16, i32, i64, i128, isize, u8, u16, u32, u64, u128, usize, f32, f64, char 48 | ]; 49 | 50 | impl Footprint for Vec 51 | where 52 | T: Footprint, 53 | { 54 | fn footprint(&self) -> Result { 55 | use std::mem::size_of; 56 | 57 | let mut size = 58 | err_at!(FailConvert, isize::try_from(size_of::>() + self.capacity()))?; 59 | 60 | for item in self.iter() { 61 | size += item.footprint()? 62 | } 63 | 64 | Ok(size) 65 | } 66 | } 67 | 68 | impl Footprint for String { 69 | fn footprint(&self) -> Result { 70 | use std::mem::size_of; 71 | Ok(err_at!(FailConvert, isize::try_from(size_of::() + self.capacity()))?) 72 | } 73 | } 74 | 75 | #[cfg(test)] 76 | #[path = "types_test.rs"] 77 | mod types_test; 78 | -------------------------------------------------------------------------------- /src/dbs/types_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | macro_rules! test_diff_basic_types { 4 | ($(($type:ident, $name:ident)),*) => ( 5 | $( 6 | #[test] 7 | fn $name() { 8 | let ver1: $type = rand::random(); 9 | let ver2: $type = rand::random(); 10 | 11 | assert_eq!(ver2.diff(&ver1), ver1); 12 | assert_eq!(ver2.merge(&ver1), ver1); 13 | } 14 | )* 15 | ); 16 | } 17 | 18 | test_diff_basic_types![ 19 | (bool, test_diff_bool), 20 | (i8, test_diff_i8), 21 | (i16, test_diff_i16), 22 | (i32, test_diff_i32), 23 | (i64, test_diff_i64), 24 | (i128, test_diff_i128), 25 | (isize, test_diff_isize), 26 | (u8, test_diff_u8), 27 | (u16, test_diff_u16), 28 | (u32, test_diff_u32), 29 | (u64, test_diff_u64), 30 | (u128, test_diff_u128), 31 | (usize, test_diff_usize) 32 | ]; 33 | 34 | macro_rules! test_footprint_basic_types { 35 | ($(($type:ty, $name:ident, $size:expr)),*) => ( 36 | $( 37 | #[test] 38 | fn $name() { 39 | let val: $type = Default::default(); 40 | assert_eq!(val.footprint().unwrap(), $size, stringify!($name)); 41 | } 42 | )* 43 | ); 44 | } 45 | 46 | test_footprint_basic_types![ 47 | (bool, test_footprint_bool, 1), 48 | (i8, test_footprint_i8, 1), 49 | (i16, test_footprint_i16, 2), 50 | (i32, test_footprint_i32, 4), 51 | (i64, test_footprint_i64, 8), 52 | (i128, test_footprint_i128, 16), 53 | (isize, test_footprint_isize, 8), 54 | (u8, test_footprint_u8, 1), 55 | (u16, test_footprint_u16, 2), 56 | (u32, test_footprint_u32, 4), 57 | (u64, test_footprint_u64, 8), 58 | (u128, test_footprint_u128, 16), 59 | (usize, test_footprint_usize, 8), 60 | (f32, test_footprint_f32, 4), 61 | (f64, test_footprint_f64, 8), 62 | (char, test_footprint_char, 4) 63 | ]; 64 | -------------------------------------------------------------------------------- /src/dbs/value.rs: -------------------------------------------------------------------------------- 1 | use cbordata::Cborize; 2 | 3 | use crate::{dbs::Footprint, Error, Result}; 4 | 5 | /// This value must change only when the shape of Value type changes. High 16-bits 6 | /// identify the type and lower 16-bits identify the version. 7 | pub const VALUE_VER: u32 = 0x00020001; 8 | 9 | /// Value type, describe the value part of each entry withing a indexed data-set 10 | #[derive(Clone, Debug, Eq, PartialEq, Cborize)] 11 | pub enum Value { 12 | U { value: V, seqno: u64 }, 13 | D { seqno: u64 }, 14 | } 15 | 16 | impl Footprint for Value 17 | where 18 | V: Footprint, 19 | { 20 | fn footprint(&self) -> Result { 21 | use std::{convert::TryFrom, mem::size_of}; 22 | 23 | let mut size = { 24 | err_at!(FailConvert, isize::try_from(size_of::>()))? 25 | - err_at!(FailConvert, isize::try_from(size_of::()))? 26 | }; 27 | 28 | size += match self { 29 | Value::U { value, .. } => value.footprint()?, 30 | Value::D { .. } => 0, 31 | }; 32 | 33 | Ok(size) 34 | } 35 | } 36 | 37 | impl Value { 38 | pub const ID: u32 = VALUE_VER; 39 | 40 | #[inline] 41 | pub fn new_upsert(value: V, seqno: u64) -> Self { 42 | Value::U { value, seqno } 43 | } 44 | 45 | #[inline] 46 | pub fn new_delete(seqno: u64) -> Self { 47 | Value::D { seqno } 48 | } 49 | } 50 | 51 | impl Value { 52 | #[inline] 53 | pub fn to_seqno(&self) -> u64 { 54 | match self { 55 | Value::U { seqno, .. } => *seqno, 56 | Value::D { seqno } => *seqno, 57 | } 58 | } 59 | 60 | #[inline] 61 | pub fn is_deleted(&self) -> bool { 62 | match self { 63 | Value::U { .. } => false, 64 | Value::D { .. } => true, 65 | } 66 | } 67 | 68 | #[inline] 69 | pub fn to_value(&self) -> Option 70 | where 71 | V: Clone, 72 | { 73 | match self { 74 | Value::U { value, .. } => Some(value.clone()), 75 | Value::D { .. } => None, 76 | } 77 | } 78 | 79 | #[inline] 80 | pub fn unpack(&self) -> (u64, Option) 81 | where 82 | V: Clone, 83 | { 84 | match self { 85 | Value::U { value, seqno } => (*seqno, Some(value.clone())), 86 | Value::D { seqno } => (*seqno, None), 87 | } 88 | } 89 | } 90 | 91 | #[cfg(test)] 92 | #[path = "value_test.rs"] 93 | mod value_test; 94 | -------------------------------------------------------------------------------- /src/dbs/value_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_value_new() { 5 | let value = Value::new_upsert(0x1234_u64, 1); 6 | assert_eq!(value.to_seqno(), 1); 7 | assert_eq!(value.to_value(), Some(0x1234)); 8 | assert_eq!(value.unpack(), (1, Some(0x1234))); 9 | 10 | let value: Value = Value::new_delete(2); 11 | assert_eq!(value.to_seqno(), 2); 12 | assert_eq!(value.to_value(), None); 13 | assert_eq!(value.unpack(), (2, None)); 14 | } 15 | 16 | #[test] 17 | fn test_value_footprint() { 18 | let value = Value::new_upsert(0x1234_u64, 2); 19 | assert_eq!(value.footprint().unwrap(), 24); 20 | let value = Value::new_upsert(vec![0x1234_u64], 2); 21 | assert_eq!(value.footprint().unwrap(), 49, "{}", value.footprint().unwrap()); 22 | let value = Value::new_upsert(vec!["hello world".to_string()], 2); 23 | assert_eq!(value.footprint().unwrap(), 76, "{}", value.footprint().unwrap()); 24 | 25 | let value: Value = Value::new_delete(2); 26 | assert_eq!(value.footprint().unwrap(), 16); 27 | } 28 | -------------------------------------------------------------------------------- /src/dbs/wop.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::Arbitrary; 2 | 3 | use crate::dbs; 4 | 5 | /// Write operations allowed on index. 6 | /// 7 | /// * Optional `cas`, when supplied, should match with key's current 8 | /// sequence-number. If key is missing from index, `cas` must be supplied 9 | /// as ZERO. 10 | /// * Optional `seqno`, when supplied, shall be used as mutation's sequence 11 | /// number, ignoring index's monotonically increasing sequence-number. 12 | /// Typically used while replaying operations from external entities like 13 | /// Write-Ahead-Logs. 14 | #[derive(Clone, Arbitrary)] 15 | pub enum Write { 16 | /// Refer to llrb::Index::set. 17 | Set { 18 | key: K, 19 | value: V, 20 | cas: Option, 21 | seqno: Option, 22 | }, 23 | /// Refer to llrb::Index::insert. 24 | Ins { 25 | key: K, 26 | value: V, 27 | cas: Option, 28 | seqno: Option, 29 | }, 30 | /// Refer to llrb::Index::delete. 31 | Del { 32 | key: K, 33 | cas: Option, 34 | seqno: Option, 35 | }, 36 | /// Refer to llrb::Index::remove. 37 | Rem { 38 | key: K, 39 | cas: Option, 40 | seqno: Option, 41 | }, 42 | } 43 | 44 | impl Write { 45 | #[inline] 46 | pub fn set(key: K, value: V) -> Write { 47 | Write::Set { key, value, cas: None, seqno: None } 48 | } 49 | 50 | #[inline] 51 | pub fn insert(key: K, value: V) -> Write { 52 | Write::Ins { key, value, cas: None, seqno: None } 53 | } 54 | 55 | #[inline] 56 | pub fn remove(key: K) -> Write { 57 | Write::Rem { key, cas: None, seqno: None } 58 | } 59 | 60 | #[inline] 61 | pub fn delete(key: K) -> Write { 62 | Write::Del { key, cas: None, seqno: None } 63 | } 64 | 65 | pub fn set_seqno(self, seqno: u64) -> Write { 66 | use Write::*; 67 | 68 | match self { 69 | Set { key, value, cas, .. } => Set { key, value, cas, seqno: Some(seqno) }, 70 | Ins { key, value, cas, .. } => Ins { key, value, cas, seqno: Some(seqno) }, 71 | Del { key, cas, .. } => Del { key, cas, seqno: Some(seqno) }, 72 | Rem { key, cas, .. } => Rem { key, cas, seqno: Some(seqno) }, 73 | } 74 | } 75 | 76 | pub fn set_cas(self, cas: u64) -> Write { 77 | use Write::*; 78 | 79 | match self { 80 | Set { key, value, seqno, .. } => Set { key, value, seqno, cas: Some(cas) }, 81 | Ins { key, value, seqno, .. } => Ins { key, value, seqno, cas: Some(cas) }, 82 | Del { key, seqno, .. } => Del { key, seqno, cas: Some(cas) }, 83 | Rem { key, seqno, .. } => Rem { key, seqno, cas: Some(cas) }, 84 | } 85 | } 86 | } 87 | 88 | /// Result type for all write operations into index. 89 | pub struct Wr 90 | where 91 | V: dbs::Diff, 92 | { 93 | /// Mutation sequence number for this write-operation. 94 | pub seqno: u64, 95 | pub old_entry: Option>, 96 | } 97 | -------------------------------------------------------------------------------- /src/git/config.rs: -------------------------------------------------------------------------------- 1 | //! Configuration information to open a git repository as key-value index. 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | /// Type define permissions to create/access git-repository. Maps to git permissions. 6 | #[derive(Clone, Serialize, Deserialize)] 7 | pub enum Permissions { 8 | #[serde(rename = "shared_umask")] 9 | SharedUmask, 10 | #[serde(rename = "shared_group")] 11 | SharedGroup, 12 | #[serde(rename = "shared_all")] 13 | SharedAll, 14 | } 15 | 16 | /// Type to configure _git-repository_ while creating them and opening them. 17 | #[derive(Clone, Serialize, Deserialize)] 18 | pub struct Config { 19 | /// location of repository root. 20 | pub loc_repo: String, 21 | /// location of database keys, aka file-names, relative to root. 22 | pub loc_db: String, 23 | /// user information to be used in git-commits. 24 | pub user_name: String, 25 | /// user information to be used in git-commits. 26 | pub user_email: String, 27 | /// Refer to [InitConfig] 28 | pub init: InitConfig, 29 | /// Refer to [OpenConfig] 30 | pub open: OpenConfig, 31 | } 32 | 33 | /// Type to configure _git-repository_ while creating them. 34 | #[derive(Clone, Serialize, Deserialize)] 35 | pub struct InitConfig { 36 | /// Create a bare repository with no working directory, 37 | /// ``DEFAULT: false``. 38 | pub bare: Option, 39 | /// Return an error if the repository path appears to already be a git repository, 40 | /// ``DEFAULT: true``. 41 | pub no_reinit: Option, 42 | /// Refer to [Permissions]. 43 | pub permissions: Option, 44 | /// repository/index description. 45 | pub description: String, 46 | } 47 | 48 | /// Type to configure _git-repository_ while opening them. 49 | #[derive(Clone, Serialize, Deserialize)] 50 | pub struct OpenConfig { 51 | /// Only open the specified path; don’t walk upward searching, 52 | /// ``Default: true``. 53 | pub no_search: Option, 54 | } 55 | -------------------------------------------------------------------------------- /src/git/default.toml: -------------------------------------------------------------------------------- 1 | # Actual key-value, where key is the document location and value (aka document) 2 | # is the file, can be a subdirectory within the repo_path. If empty or missing 3 | # shall default to repo_path. 4 | db_path = "" 5 | 6 | # configuration meant for creating a fresh key-value repository. 7 | [init] 8 | # Can be one of "shared_umask", "shared_group", "shared_all". 9 | permissions = "shared_umask" 10 | # Must be a valid description for this kv store. 11 | description = "" 12 | 13 | [open] 14 | 15 | [clone] 16 | -------------------------------------------------------------------------------- /src/git/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement db like interface into git repository. 2 | //! 3 | //! GIT is a candidate for DBA store. In other words, several of GIT ideas are 4 | //! incorporated into the DBA design. But this wrapper around the `libgit2` 5 | //! is minimalistic. While the types and traits defined under [dba] module maps 6 | //! to this wrapper, it is a read-only mapping. That is we use `libgit2` for all 7 | //! heavy lifting, most noteably, write operations into git-store and convert 8 | //! the native `libgit2` types to [dba] types for efficiency and ergonomics. 9 | 10 | use crate::dba; 11 | 12 | /// Default git directory under working git repository. 13 | pub const GIT_DIR: &str = ".git"; 14 | 15 | mod config; 16 | mod index; 17 | pub mod repo; 18 | mod trie; 19 | 20 | pub use config::{Config, InitConfig, OpenConfig, Permissions}; 21 | pub use index::{Index, IterLevel, Txn}; 22 | use trie::{Node, Op, Trie}; 23 | 24 | /// Type abstracts write-access into git storage. 25 | pub enum WriteOp 26 | where 27 | K: dba::AsKey, 28 | V: AsRef<[u8]>, 29 | { 30 | /// insert leaf component 31 | Ins { key: K, value: V }, 32 | /// Remove leaf component 33 | Rem { key: K }, 34 | } 35 | -------------------------------------------------------------------------------- /src/git/trie.rs: -------------------------------------------------------------------------------- 1 | #[derive(Default)] 2 | pub struct Trie { 3 | root: Node, 4 | } 5 | 6 | impl Trie { 7 | pub fn new() -> Trie { 8 | Trie { root: "--root--".to_string().into() } 9 | } 10 | 11 | pub fn insert(&mut self, comps: &[String], value: &[u8]) { 12 | self.root.insert(comps, value); 13 | } 14 | 15 | pub fn remove(&mut self, comps: &[String]) { 16 | self.root.remove(comps) 17 | } 18 | 19 | pub fn as_root(&self) -> &Node { 20 | &self.root 21 | } 22 | } 23 | 24 | #[derive(Default)] 25 | pub struct Node { 26 | comp: String, 27 | children: Vec, 28 | leafs: Vec, 29 | } 30 | 31 | pub enum Op { 32 | /// insert leaf component 33 | Ins { comp: String, value: Vec }, 34 | /// Remove leaf component 35 | Rem { comp: String }, 36 | } 37 | 38 | impl From for Node { 39 | fn from(comp: String) -> Node { 40 | Node { 41 | comp, 42 | children: Vec::default(), 43 | leafs: Vec::default(), 44 | } 45 | } 46 | } 47 | 48 | impl Node { 49 | pub fn as_comp(&self) -> &str { 50 | &self.comp 51 | } 52 | 53 | pub fn as_children(&self) -> &[Node] { 54 | &self.children 55 | } 56 | 57 | pub fn as_leafs(&self) -> &[Op] { 58 | &self.leafs 59 | } 60 | 61 | pub fn insert(&mut self, comps: &[String], value: &[u8]) { 62 | match comps { 63 | [comp] => { 64 | let res = self.leafs.binary_search_by_key(&comp, |w| match w { 65 | Op::Ins { comp, .. } => comp, 66 | Op::Rem { comp } => comp, 67 | }); 68 | let off = match res { 69 | Ok(off) => off, 70 | Err(off) => off, 71 | }; 72 | let w = Op::Ins { comp: comp.to_string(), value: value.to_vec() }; 73 | self.leafs.insert(off, w); 74 | } 75 | [comp, ..] => { 76 | let res = self.children.binary_search_by_key(&comp, |n| &n.comp); 77 | let off = match res { 78 | Ok(off) => off, 79 | Err(off) => { 80 | self.children.insert(off, comp.clone().into()); 81 | off 82 | } 83 | }; 84 | self.children[off].insert(&comps[1..], value); 85 | } 86 | [] => unreachable!(), 87 | } 88 | } 89 | 90 | fn remove(&mut self, comps: &[String]) { 91 | match comps { 92 | [comp] => { 93 | let res = self.leafs.binary_search_by_key(&comp, |w| match w { 94 | Op::Ins { comp, .. } => comp, 95 | Op::Rem { comp } => comp, 96 | }); 97 | let off = match res { 98 | Ok(off) => off, 99 | Err(off) => off, 100 | }; 101 | let w = Op::Rem { comp: comp.to_string() }; 102 | self.leafs.insert(off, w); 103 | } 104 | [comp, ..] => { 105 | let res = self.children.binary_search_by_key(&comp, |n| &n.comp); 106 | let off = match res { 107 | Ok(off) => off, 108 | Err(off) => { 109 | self.children.insert(off, comp.clone().into()); 110 | off 111 | } 112 | }; 113 | self.children[off].remove(&comps[1..]); 114 | } 115 | [] => unreachable!(), 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/hash/mod.rs: -------------------------------------------------------------------------------- 1 | use std::hash::{BuildHasher, Hasher}; 2 | 3 | /// Type uses google's city hash to convert [Hash]able key into ``u64``. 4 | /// Refer [cityhash_rs] for details. 5 | #[derive(Clone, Copy, Default)] 6 | pub struct CityHasher { 7 | digest: u128, 8 | } 9 | 10 | impl CityHasher { 11 | pub fn new() -> CityHasher { 12 | CityHasher::default() 13 | } 14 | } 15 | 16 | impl BuildHasher for CityHasher { 17 | type Hasher = Self; 18 | 19 | #[inline] 20 | fn build_hasher(&self) -> Self { 21 | self.clone() 22 | } 23 | } 24 | 25 | impl Hasher for CityHasher { 26 | fn finish(&self) -> u64 { 27 | ((self.digest >> 64) as u64) ^ ((self.digest & 0xFFFFFFFFFFFFFFFF) as u64) 28 | } 29 | 30 | fn write(&mut self, bytes: &[u8]) { 31 | self.digest = cityhash_rs::cityhash_110_128(bytes); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/llrb/depth_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | 3 | use super::*; 4 | 5 | #[test] 6 | fn test_llrb_depth() { 7 | let seed: u64 = random(); 8 | println!("test_llrb_depth seed:{}", seed); 9 | let mut rng = StdRng::seed_from_u64(seed); 10 | 11 | let mut depths = [0_usize; 256]; 12 | let (mut val, n_samples) = (Depth::default(), rng.gen::() % 1_000_000); 13 | println!("test_llrb_depth n_samples:{}", n_samples); 14 | for _ in 0..n_samples { 15 | let d = rng.gen::(); 16 | depths[d as usize] += 1; 17 | val.sample(d as usize); 18 | } 19 | 20 | assert_eq!(val.to_samples(), n_samples); 21 | { 22 | let min = depths 23 | .to_vec() 24 | .into_iter() 25 | .enumerate() 26 | .find(|(_, c)| *c != 0) 27 | .map(|x| x.0) 28 | .unwrap_or(usize::MAX); 29 | assert_eq!(val.to_min(), min); 30 | } 31 | { 32 | let max = depths 33 | .to_vec() 34 | .into_iter() 35 | .enumerate() 36 | .rev() 37 | .find(|(_, c)| *c != 0) 38 | .map(|x| x.0) 39 | .unwrap_or(usize::MIN); 40 | assert_eq!(val.to_max(), max); 41 | } 42 | { 43 | let total: usize = depths.iter().enumerate().map(|(d, c)| d * (*c)).sum(); 44 | let count: usize = depths.to_vec().into_iter().sum(); 45 | assert_eq!(val.to_mean(), total / count); 46 | } 47 | // TODO: test case for to_percentiles() 48 | } 49 | -------------------------------------------------------------------------------- /src/llrb/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement Left leaning red black tree with multi-reader support. 2 | 3 | mod depth; 4 | mod index; 5 | mod node; 6 | mod stats; 7 | 8 | pub use depth::Depth; 9 | pub use index::{Index, Iter, Range, Reverse}; 10 | use node::Node; 11 | pub use stats::Stats; 12 | 13 | #[cfg(any(test, feature = "rdms"))] 14 | pub use index::load_index; 15 | -------------------------------------------------------------------------------- /src/llrb/node.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::{ 4 | dbs::{self, Footprint}, 5 | Error, Result, 6 | }; 7 | 8 | // Node corresponds to a single entry in Llrb instance. 9 | #[derive(Clone)] 10 | pub struct Node 11 | where 12 | V: dbs::Diff, 13 | { 14 | pub entry: Arc>, 15 | pub black: bool, // store: black or red 16 | pub left: Option>>, // store: left child 17 | pub right: Option>>, // store: right child 18 | } 19 | 20 | impl Footprint for Node 21 | where 22 | K: Footprint, 23 | V: dbs::Diff + Footprint, 24 | ::Delta: Footprint, 25 | { 26 | fn footprint(&self) -> Result { 27 | use std::{convert::TryFrom, mem::size_of}; 28 | 29 | let size = size_of::>(); 30 | let overhead = err_at!(FailConvert, isize::try_from(size))?; 31 | Ok(overhead + self.entry.footprint()?) 32 | } 33 | } 34 | 35 | impl Node 36 | where 37 | V: dbs::Diff, 38 | { 39 | pub fn set(&mut self, value: V, seqno: u64) 40 | where 41 | K: Clone, 42 | { 43 | let mut entry = self.entry.as_ref().clone(); 44 | entry.value = dbs::Value::new_upsert(value, seqno); 45 | entry.deltas = Vec::default(); 46 | self.entry = Arc::new(entry); 47 | } 48 | 49 | pub fn insert(&mut self, value: V, seqno: u64) 50 | where 51 | K: Clone, 52 | { 53 | self.entry = Arc::new(self.entry.as_ref().insert(value, seqno)); 54 | } 55 | 56 | pub fn delete(&mut self, seqno: u64) 57 | where 58 | K: Clone, 59 | { 60 | self.entry = Arc::new(self.entry.as_ref().delete(seqno)); 61 | } 62 | 63 | pub fn commit(&mut self, other: dbs::Entry) -> Result<()> 64 | where 65 | K: PartialEq + Clone, 66 | { 67 | self.entry = Arc::new(self.entry.as_ref().commit(&other)?); 68 | Ok(()) 69 | } 70 | 71 | #[inline] 72 | pub fn set_red(&mut self) { 73 | self.black = false 74 | } 75 | 76 | #[inline] 77 | pub fn set_black(&mut self) { 78 | self.black = true 79 | } 80 | 81 | #[inline] 82 | pub fn toggle_link(&mut self) { 83 | self.black = !self.black 84 | } 85 | } 86 | 87 | impl Node 88 | where 89 | V: dbs::Diff, 90 | { 91 | #[inline] 92 | pub fn as_left_ref(&self) -> Option<&Node> { 93 | self.left.as_deref() 94 | } 95 | 96 | #[inline] 97 | pub fn as_right_ref(&self) -> Option<&Node> { 98 | self.right.as_deref() 99 | } 100 | 101 | #[inline] 102 | pub fn is_black(&self) -> bool { 103 | self.black 104 | } 105 | 106 | #[inline] 107 | pub fn as_key(&self) -> &K { 108 | self.entry.as_key() 109 | } 110 | 111 | #[inline] 112 | pub fn to_seqno(&self) -> u64 { 113 | self.entry.to_seqno() 114 | } 115 | 116 | #[inline] 117 | pub fn is_deleted(&self) -> bool { 118 | self.entry.is_deleted() 119 | } 120 | } 121 | 122 | impl From> for Node 123 | where 124 | V: dbs::Diff, 125 | { 126 | fn from(entry: dbs::Entry) -> Node { 127 | Node { 128 | entry: Arc::new(entry), 129 | black: false, 130 | left: None, 131 | right: None, 132 | } 133 | } 134 | } 135 | 136 | #[cfg(test)] 137 | #[path = "node_test.rs"] 138 | mod node_test; 139 | -------------------------------------------------------------------------------- /src/llrb/node_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_llrb_node() { 5 | let entry = dbs::Entry::new(10, 200, 1); 6 | let mut node: Node = entry.into(); 7 | assert_eq!(node.footprint().unwrap(), 80); 8 | assert!(node.as_left_ref().is_none()); 9 | assert!(node.as_right_ref().is_none()); 10 | assert!(!node.is_black()); 11 | assert_eq!(*node.as_key(), 10); 12 | assert_eq!(node.to_seqno(), 1); 13 | assert!(!node.is_deleted()); 14 | 15 | node.set_red(); 16 | assert!(!node.is_black()); 17 | node.set_black(); 18 | assert!(node.is_black()); 19 | node.toggle_link(); 20 | assert!(!node.is_black()); 21 | 22 | node.set(300, 2); 23 | assert_eq!(dbs::Entry::new(10, 300, 2), node.entry.as_ref().clone()); 24 | 25 | node.insert(400, 3); 26 | let mut entry = dbs::Entry::new(10, 400, 3); 27 | entry.deltas = vec![crate::dbs::Delta::U { delta: 300, seqno: 2 }]; 28 | assert_eq!(entry, node.entry.as_ref().clone()); 29 | 30 | node.delete(4); 31 | entry = entry.delete(4); 32 | assert_eq!(entry, node.entry.as_ref().clone()); 33 | 34 | node.delete(5); 35 | entry = entry.delete(5); 36 | assert_eq!(entry, node.entry.as_ref().clone()); 37 | 38 | node.insert(500, 6); 39 | entry = entry.insert(500, 6); 40 | assert_eq!(entry, node.entry.as_ref().clone()); 41 | } 42 | -------------------------------------------------------------------------------- /src/llrb/stats.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt, result}; 2 | 3 | #[allow(unused_imports)] 4 | use crate::llrb::Index; 5 | use crate::{dbs, llrb::Depth, util::spinlock}; 6 | 7 | /// Statistic type, for [Index] type. 8 | pub struct Stats { 9 | pub name: String, 10 | pub spin: bool, 11 | pub node_size: usize, 12 | pub n_count: usize, 13 | pub n_deleted: usize, 14 | pub tree_footprint: isize, 15 | pub spin_stats: spinlock::Stats, 16 | pub blacks: Option, 17 | pub depths: Option, 18 | } 19 | 20 | impl Stats { 21 | pub(crate) fn new(name: &str, spin: bool) -> Stats { 22 | Stats { 23 | name: name.to_string(), 24 | spin, 25 | node_size: Default::default(), 26 | n_count: Default::default(), 27 | n_deleted: Default::default(), 28 | tree_footprint: Default::default(), 29 | spin_stats: Default::default(), 30 | blacks: None, 31 | depths: None, 32 | } 33 | } 34 | } 35 | 36 | impl fmt::Display for Stats { 37 | fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { 38 | let none = "none".to_string(); 39 | let b = self.blacks.as_ref().map_or(none.clone(), |x| x.to_string()); 40 | let d = self.depths.as_ref().map_or(none, |x| x.to_string()); 41 | writeln!(f, "llrb.name = {}", self.name)?; 42 | writeln!( 43 | f, 44 | "llrb = {{ n_count={}, n_deleted={} node_size={}, blacks={} }}", 45 | self.n_count, self.n_deleted, self.node_size, b, 46 | )?; 47 | writeln!(f, "llrb = {{ tree_footprint={} }}", self.tree_footprint)?; 48 | writeln!(f, "llrb.spin_stats = {}", self.spin_stats)?; 49 | writeln!(f, "llrb.depths = {}", d) 50 | } 51 | } 52 | 53 | impl dbs::ToJson for Stats { 54 | fn to_json(&self) -> String { 55 | let null = "null".to_string(); 56 | // TODO: should we convert this to to_json() ? 57 | let spin_stats = self.spin_stats.to_string(); 58 | format!( 59 | concat!( 60 | r#"{{ ""llrb": {{ "name": {}, "n_count": {:X}, "#, 61 | r#""n_deleted": {}, "#, 62 | r#""tree_footprint": {}, "#, 63 | r#""node_size": {}, "spin_stats": {}, "#, 64 | r#""blacks": {}, "depths": {} }} }}"#, 65 | ), 66 | self.name, 67 | self.n_count, 68 | self.n_deleted, 69 | self.tree_footprint, 70 | self.node_size, 71 | spin_stats, 72 | self.blacks.as_ref().map_or(null.clone(), |x| format!("{}", x)), 73 | self.depths.as_ref().map_or(null, |x| x.to_json()), 74 | ) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/mq/filter.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use std::{sync::mpsc, thread}; 4 | 5 | use crate::{mq, Error, Result}; 6 | 7 | pub struct Filter 8 | where 9 | Q: 'static + Sync + Send, 10 | F: 'static + Sync + Send + Fn(&Q) -> bool, 11 | { 12 | name: String, 13 | chan_size: usize, 14 | 15 | input: Option>, 16 | filter: Option, 17 | handle: Option>>, 18 | } 19 | 20 | impl Filter 21 | where 22 | Q: 'static + Sync + Send, 23 | F: 'static + Sync + Send + Fn(&Q) -> bool, 24 | { 25 | pub fn new(name: String, input: mpsc::Receiver, filter: F) -> Self { 26 | Filter { 27 | name, 28 | chan_size: mq::DEFAULT_CHAN_SIZE, 29 | 30 | input: Some(input), 31 | filter: Some(filter), 32 | handle: None, 33 | } 34 | } 35 | 36 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 37 | self.chan_size = chan_size; 38 | self 39 | } 40 | 41 | pub fn spawn(&mut self) -> mpsc::Receiver { 42 | let (name, chan_size) = (self.name.clone(), self.chan_size); 43 | let (tx, output) = mpsc::sync_channel(self.chan_size); 44 | 45 | let (input, filter) = (self.input.take().unwrap(), self.filter.take().unwrap()); 46 | 47 | self.handle = 48 | Some(thread::spawn(move || action(name, chan_size, input, tx, filter))); 49 | 50 | output 51 | } 52 | 53 | pub fn close_wait(self) -> Result<()> { 54 | match self.handle { 55 | Some(handle) => match handle.join() { 56 | Ok(res) => res, 57 | Err(_) => { 58 | err_at!(ThreadFail, msg: "thread fail Filter<{:?}>", self.name) 59 | } 60 | }, 61 | None => Ok(()), 62 | } 63 | } 64 | } 65 | 66 | fn action( 67 | name: String, 68 | chan_size: usize, 69 | input: mpsc::Receiver, 70 | tx: mpsc::SyncSender, 71 | filter: F, 72 | ) -> Result<()> 73 | where 74 | Q: 'static + Sync + Send, 75 | F: 'static + Sync + Send + Fn(&Q) -> bool, 76 | { 77 | loop { 78 | match mq::get_messages(&input, chan_size) { 79 | Ok(qmsgs) => { 80 | for rmsg in qmsgs.into_par_iter().filter(&filter).collect::>() { 81 | err_at!(IPCFail, tx.send(rmsg), "thread Filter<{:?}", name)? 82 | } 83 | } 84 | Err(mpsc::TryRecvError::Disconnected) => break, 85 | _ => unreachable!(), 86 | } 87 | } 88 | 89 | Ok(()) 90 | } 91 | -------------------------------------------------------------------------------- /src/mq/filter_map.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use std::{sync::mpsc, thread}; 4 | 5 | use crate::{mq, Error, Result}; 6 | 7 | pub struct FilterMap 8 | where 9 | Q: 'static + Sync + Send, 10 | R: 'static + Sync + Send, 11 | F: 'static + Sync + Send + Fn(Q) -> Option, 12 | { 13 | name: String, 14 | chan_size: usize, 15 | 16 | input: Option>, 17 | filter_map: Option, 18 | handle: Option>>, 19 | } 20 | 21 | impl FilterMap 22 | where 23 | Q: 'static + Sync + Send, 24 | R: 'static + Sync + Send, 25 | F: 'static + Sync + Send + Fn(Q) -> Option, 26 | { 27 | pub fn new(name: String, input: mpsc::Receiver, filter_map: F) -> Self { 28 | FilterMap { 29 | name, 30 | chan_size: mq::DEFAULT_CHAN_SIZE, 31 | 32 | input: Some(input), 33 | filter_map: Some(filter_map), 34 | handle: None, 35 | } 36 | } 37 | 38 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 39 | self.chan_size = chan_size; 40 | self 41 | } 42 | 43 | pub fn spawn(&mut self) -> mpsc::Receiver { 44 | let (name, chan_size) = (self.name.clone(), self.chan_size); 45 | let (tx, output) = mpsc::sync_channel(self.chan_size); 46 | 47 | let input = self.input.take().unwrap(); 48 | let filter_map = self.filter_map.take().unwrap(); 49 | 50 | self.handle = 51 | Some(thread::spawn(move || action(name, chan_size, input, tx, filter_map))); 52 | 53 | output 54 | } 55 | 56 | pub fn close_wait(self) -> Result<()> { 57 | match self.handle { 58 | Some(handle) => match handle.join() { 59 | Ok(res) => res, 60 | Err(_) => { 61 | err_at!(ThreadFail, msg: "thread fail FilterMap<{:?}>", self.name) 62 | } 63 | }, 64 | None => Ok(()), 65 | } 66 | } 67 | } 68 | 69 | fn action( 70 | name: String, 71 | chan_size: usize, 72 | input: mpsc::Receiver, 73 | tx: mpsc::SyncSender, 74 | filter_map: F, 75 | ) -> Result<()> 76 | where 77 | R: 'static + Sync + Send, 78 | Q: 'static + Sync + Send, 79 | F: 'static + Sync + Send + Fn(Q) -> Option, 80 | { 81 | loop { 82 | match mq::get_messages(&input, chan_size) { 83 | Ok(qmsgs) => { 84 | for rmsg in 85 | qmsgs.into_par_iter().filter_map(&filter_map).collect::>() 86 | { 87 | err_at!(IPCFail, tx.send(rmsg), "thread FilterMap<{:?}", name)? 88 | } 89 | } 90 | Err(mpsc::TryRecvError::Disconnected) => break, 91 | _ => unreachable!(), 92 | } 93 | } 94 | 95 | Ok(()) 96 | } 97 | -------------------------------------------------------------------------------- /src/mq/map.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use std::{sync::mpsc, thread}; 4 | 5 | use crate::{mq, Error, Result}; 6 | 7 | pub struct Map 8 | where 9 | Q: 'static + Sync + Send, 10 | R: 'static + Sync + Send, 11 | F: 'static + Sync + Send + Fn(Q) -> R, 12 | { 13 | name: String, 14 | chan_size: usize, 15 | 16 | input: Option>, 17 | map: Option, 18 | handle: Option>>, 19 | } 20 | 21 | impl Map 22 | where 23 | Q: 'static + Sync + Send, 24 | R: 'static + Sync + Send, 25 | F: 'static + Sync + Send + Fn(Q) -> R, 26 | { 27 | pub fn new(name: String, input: mpsc::Receiver, map: F) -> Self { 28 | Map { 29 | name, 30 | chan_size: mq::DEFAULT_CHAN_SIZE, 31 | 32 | input: Some(input), 33 | map: Some(map), 34 | handle: None, 35 | } 36 | } 37 | 38 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 39 | self.chan_size = chan_size; 40 | self 41 | } 42 | 43 | pub fn spawn(&mut self) -> mpsc::Receiver { 44 | let (name, chan_size) = (self.name.clone(), self.chan_size); 45 | let (tx, output) = mpsc::sync_channel(self.chan_size); 46 | 47 | let (input, map) = (self.input.take().unwrap(), self.map.take().unwrap()); 48 | 49 | self.handle = 50 | Some(thread::spawn(move || action(name, chan_size, input, tx, map))); 51 | 52 | output 53 | } 54 | 55 | pub fn close_wait(self) -> Result<()> { 56 | match self.handle { 57 | Some(handle) => match handle.join() { 58 | Ok(res) => res, 59 | Err(_) => { 60 | err_at!(ThreadFail, msg: "thread fail Map<{:?}>", self.name) 61 | } 62 | }, 63 | None => Ok(()), 64 | } 65 | } 66 | } 67 | 68 | fn action( 69 | name: String, 70 | chan_size: usize, 71 | input: mpsc::Receiver, 72 | tx: mpsc::SyncSender, 73 | map: F, 74 | ) -> Result<()> 75 | where 76 | Q: 'static + Sync + Send, 77 | R: 'static + Sync + Send, 78 | F: 'static + Sync + Send + Fn(Q) -> R, 79 | { 80 | loop { 81 | match mq::get_messages(&input, chan_size) { 82 | Ok(qmsgs) => { 83 | for rmsg in qmsgs.into_par_iter().map(&map).collect::>() { 84 | err_at!(IPCFail, tx.send(rmsg), "thread Map<{:?}", name)? 85 | } 86 | } 87 | Err(mpsc::TryRecvError::Disconnected) => break, 88 | _ => unreachable!(), 89 | } 90 | } 91 | 92 | Ok(()) 93 | } 94 | -------------------------------------------------------------------------------- /src/mq/mod.rs: -------------------------------------------------------------------------------- 1 | use std::{result, sync::mpsc}; 2 | 3 | pub mod filter; 4 | pub mod filter_map; 5 | pub mod map; 6 | pub mod reduce; 7 | pub mod sink; 8 | pub mod source; 9 | pub mod split; 10 | 11 | const DEFAULT_CHAN_SIZE: usize = 1024; 12 | 13 | pub trait Message { 14 | fn finish() -> Self; 15 | } 16 | 17 | fn get_messages( 18 | input: &mpsc::Receiver, 19 | chan_size: usize, 20 | ) -> result::Result, mpsc::TryRecvError> 21 | where 22 | Q: 'static + Send, 23 | { 24 | let mut qmsgs = vec![]; 25 | loop { 26 | match input.try_recv() { 27 | Ok(qmsg) if qmsgs.len() < chan_size => qmsgs.push(qmsg), 28 | Ok(qmsg) => { 29 | qmsgs.push(qmsg); 30 | break Ok(qmsgs); 31 | } 32 | Err(mpsc::TryRecvError::Empty) => break Ok(qmsgs), 33 | Err(err @ mpsc::TryRecvError::Disconnected) => break Err(err), 34 | } 35 | } 36 | } 37 | 38 | //fn put_messages( 39 | // rmsgs: &mut Vec>, 40 | // mut rseqno: u64, 41 | // tx: &mpsc::SyncSender, 42 | //) -> result::Result> 43 | //where 44 | // R: 'static + Send, 45 | //{ 46 | // rmsgs.sort_unstable_by_key(|m| m.seqno); 47 | // rmsgs.reverse(); 48 | // 49 | // while let Some(rmsg) = rmsgs.pop() { 50 | // if rmsg.seqno == rseqno { 51 | // rseqno += 1; 52 | // tx.send(rmsg.rmsg)?; 53 | // } else { 54 | // rmsgs.push(rmsg); 55 | // break; 56 | // } 57 | // } 58 | // 59 | // Ok(rseqno) 60 | //} 61 | -------------------------------------------------------------------------------- /src/mq/reduce.rs: -------------------------------------------------------------------------------- 1 | use rayon::prelude::*; 2 | 3 | use std::{sync::mpsc, thread}; 4 | 5 | use crate::{mq, Error, Result}; 6 | 7 | pub struct Reduce 8 | where 9 | Q: 'static + Sync + Send, 10 | ID: 'static + Sync + Send + Clone + Fn() -> Q, 11 | F: 'static + Sync + Send + Fn(Q, Q) -> Q, 12 | { 13 | name: String, 14 | chan_size: usize, 15 | 16 | input: Option>, 17 | identity: Option, 18 | reduce: Option, 19 | handle: Option>>, 20 | } 21 | 22 | impl Reduce 23 | where 24 | Q: 'static + Sync + Send, 25 | ID: 'static + Sync + Send + Clone + Fn() -> Q, 26 | F: 'static + Sync + Send + Fn(Q, Q) -> Q, 27 | { 28 | pub fn new(name: String, input: mpsc::Receiver, identity: ID, reduce: F) -> Self { 29 | Reduce { 30 | name, 31 | chan_size: mq::DEFAULT_CHAN_SIZE, 32 | 33 | input: Some(input), 34 | identity: Some(identity), 35 | reduce: Some(reduce), 36 | handle: None, 37 | } 38 | } 39 | 40 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 41 | self.chan_size = chan_size; 42 | self 43 | } 44 | 45 | pub fn spawn(&mut self) -> mpsc::Receiver { 46 | let (name, chan_size) = (self.name.clone(), self.chan_size); 47 | let (tx, output) = mpsc::sync_channel(self.chan_size); 48 | 49 | let input = self.input.take().unwrap(); 50 | let identity = self.identity.take().unwrap(); 51 | let reduce = self.reduce.take().unwrap(); 52 | 53 | self.handle = Some(thread::spawn(move || { 54 | action(name, chan_size, input, tx, identity, reduce) 55 | })); 56 | 57 | output 58 | } 59 | 60 | pub fn close_wait(self) -> Result<()> { 61 | match self.handle { 62 | Some(handle) => match handle.join() { 63 | Ok(res) => res, 64 | Err(_) => { 65 | err_at!(ThreadFail, msg: "thread fail Reduce<{:?}>", self.name) 66 | } 67 | }, 68 | None => Ok(()), 69 | } 70 | } 71 | } 72 | 73 | fn action( 74 | name: String, 75 | chan_size: usize, 76 | input: mpsc::Receiver, 77 | tx: mpsc::SyncSender, 78 | identity: ID, 79 | reduce: F, 80 | ) -> Result<()> 81 | where 82 | Q: 'static + Sync + Send, 83 | ID: 'static + Sync + Send + Clone + Fn() -> Q, 84 | F: 'static + Sync + Send + Fn(Q, Q) -> Q, 85 | { 86 | let mut qmsg = None; 87 | loop { 88 | match mq::get_messages(&input, chan_size) { 89 | Ok(mut qmsgs) => { 90 | if let Some(qmsg) = qmsg { 91 | qmsgs.insert(0, qmsg) 92 | } 93 | qmsg = Some( 94 | qmsgs.into_par_iter().reduce(identity.clone(), |a, b| reduce(a, b)), 95 | ); 96 | } 97 | Err(mpsc::TryRecvError::Disconnected) => break, 98 | _ => unreachable!(), 99 | } 100 | } 101 | 102 | match qmsg { 103 | Some(rmsg) => err_at!(IPCFail, tx.send(rmsg), "thread Reduce<{:?}", name), 104 | None => Ok(()), 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/mq/sink.rs: -------------------------------------------------------------------------------- 1 | use std::{sync::mpsc, thread}; 2 | 3 | use crate::{mq, Error, Result}; 4 | 5 | pub struct Sink 6 | where 7 | Q: 'static + Send + mq::Message, 8 | F: 'static + Send + Fn(Q) -> Result, 9 | { 10 | name: String, 11 | chan_size: usize, 12 | 13 | input: Option>, 14 | callb: Option, 15 | handle: Option>>, 16 | } 17 | 18 | impl Sink 19 | where 20 | Q: 'static + Send + mq::Message, 21 | F: 'static + Send + Fn(Q) -> Result, 22 | { 23 | pub fn new_null(name: String, input: mpsc::Receiver) -> Self { 24 | Sink { 25 | name, 26 | chan_size: mq::DEFAULT_CHAN_SIZE, 27 | 28 | input: Some(input), 29 | callb: None, 30 | handle: None, 31 | } 32 | } 33 | 34 | pub fn new_callb(name: String, input: mpsc::Receiver, callb: F) -> Self { 35 | Sink { 36 | name, 37 | chan_size: mq::DEFAULT_CHAN_SIZE, 38 | 39 | input: Some(input), 40 | callb: Some(callb), 41 | handle: None, 42 | } 43 | } 44 | 45 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 46 | self.chan_size = chan_size; 47 | self 48 | } 49 | 50 | pub fn spawn(&mut self) { 51 | let chan_size = self.chan_size; 52 | 53 | let input = self.input.take().unwrap(); 54 | self.handle = match self.callb.take() { 55 | Some(callb) => { 56 | Some(thread::spawn(move || action_callb(chan_size, input, callb))) 57 | } 58 | None => Some(thread::spawn(move || action_null(chan_size, input))), 59 | }; 60 | } 61 | 62 | /// Close this sink. 63 | pub fn close_wait(self) -> Result<()> { 64 | match self.handle { 65 | Some(handle) => match handle.join() { 66 | Ok(res) => res, 67 | Err(_) => { 68 | err_at!(ThreadFail, msg: "thread fail Sink<{:?}>", self.name) 69 | } 70 | }, 71 | None => Ok(()), 72 | } 73 | } 74 | } 75 | 76 | fn action_null(chan_size: usize, input: mpsc::Receiver) -> Result<()> 77 | where 78 | Q: 'static + Send, 79 | { 80 | loop { 81 | match mq::get_messages(&input, chan_size) { 82 | Ok(_qmsgs) => (), 83 | Err(mpsc::TryRecvError::Disconnected) => break, 84 | _ => unreachable!(), 85 | } 86 | } 87 | 88 | Ok(()) 89 | } 90 | 91 | fn action_callb(chan_size: usize, input: mpsc::Receiver, callb: F) -> Result<()> 92 | where 93 | Q: 'static + Send + mq::Message, 94 | F: 'static + Send + Fn(Q) -> Result, 95 | { 96 | let res = 'outer: loop { 97 | match mq::get_messages(&input, chan_size) { 98 | Ok(qmsgs) => { 99 | for qmsg in qmsgs.into_iter() { 100 | match callb(qmsg) { 101 | Ok(true) => (), 102 | Ok(false) => break 'outer Ok(()), 103 | Err(err) => break 'outer Err(err), 104 | } 105 | } 106 | } 107 | Err(mpsc::TryRecvError::Disconnected) => break Ok(()), 108 | _ => unreachable!(), 109 | } 110 | }; 111 | 112 | callb(Q::finish())?; 113 | res 114 | } 115 | -------------------------------------------------------------------------------- /src/mq/source.rs: -------------------------------------------------------------------------------- 1 | use std::{sync::mpsc, thread}; 2 | 3 | use crate::{mq, Error, Result}; 4 | 5 | /// Source type, than can create messages either from iterator or generator function. 6 | pub struct Source 7 | where 8 | R: 'static + Send, 9 | I: 'static + Send + Iterator, 10 | F: 'static + Send + Fn() -> Result>, 11 | { 12 | name: String, 13 | chan_size: usize, 14 | 15 | inner: Option>, 16 | handle: Option>>, 17 | } 18 | 19 | enum Inner 20 | where 21 | R: 'static + Send, 22 | I: 'static + Send + Iterator, 23 | F: 'static + Send + Fn() -> Result>, 24 | { 25 | Iter { iter: I }, 26 | Gen { gen: F }, 27 | } 28 | 29 | impl Source 30 | where 31 | R: 'static + Send, 32 | I: 'static + Send + Iterator, 33 | F: 'static + Send + Fn() -> Result>, 34 | { 35 | /// Create a new source from iterator. 36 | pub fn from_iter(name: String, iter: I) -> Self { 37 | Source { 38 | name, 39 | chan_size: mq::DEFAULT_CHAN_SIZE, 40 | 41 | inner: Some(Inner::Iter { iter }), 42 | handle: None, 43 | } 44 | } 45 | 46 | /// Create a new source from generator function. 47 | pub fn from_gen(name: String, gen: F) -> Self { 48 | Source { 49 | name, 50 | chan_size: mq::DEFAULT_CHAN_SIZE, 51 | 52 | inner: Some(Inner::Gen { gen }), 53 | handle: None, 54 | } 55 | } 56 | 57 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 58 | self.chan_size = chan_size; 59 | self 60 | } 61 | 62 | /// Take the output channel for this source. Shall be called only once after 63 | /// creating the source. 64 | pub fn spawn(&mut self) -> mpsc::Receiver { 65 | let name = self.name.clone(); 66 | 67 | let (handle, output) = match self.inner.take() { 68 | Some(Inner::Iter { iter }) => { 69 | let (tx, output) = mpsc::sync_channel(self.chan_size); 70 | (thread::spawn(move || action_iter(name, tx, iter)), output) 71 | } 72 | Some(Inner::Gen { gen }) => { 73 | let (tx, output) = mpsc::sync_channel(self.chan_size); 74 | (thread::spawn(move || action_gen(name, tx, gen)), output) 75 | } 76 | None => unreachable!(), 77 | }; 78 | 79 | self.handle = Some(handle); 80 | output 81 | } 82 | 83 | /// Close this source. 84 | pub fn close_wait(self) -> Result<()> { 85 | match self.handle { 86 | Some(handle) => match handle.join() { 87 | Ok(res) => res, 88 | Err(_) => { 89 | err_at!(ThreadFail, msg: "thread fail Source<{:?}>", self.name) 90 | } 91 | }, 92 | None => Ok(()), 93 | } 94 | } 95 | } 96 | 97 | fn action_iter(name: String, tx: mpsc::SyncSender, iter: I) -> Result<()> 98 | where 99 | R: 'static + Send, 100 | I: 'static + Send + Iterator, 101 | { 102 | for msg in iter { 103 | err_at!(IPCFail, tx.send(msg), "thread Source<{:?}>", name)? 104 | } 105 | 106 | // tx shall be dropped here. 107 | Ok(()) 108 | } 109 | 110 | fn action_gen(name: String, tx: mpsc::SyncSender, gen: F) -> Result<()> 111 | where 112 | R: 'static + Send, 113 | F: 'static + Send + Fn() -> Result>, 114 | { 115 | while let Some(msg) = gen()? { 116 | err_at!(IPCFail, tx.send(msg), "thread Source<{:?}>", name)? 117 | } 118 | 119 | // tx shall be dropped here. 120 | Ok(()) 121 | } 122 | -------------------------------------------------------------------------------- /src/mq/split.rs: -------------------------------------------------------------------------------- 1 | use std::{sync::mpsc, thread}; 2 | 3 | use crate::{mq, Error, Result}; 4 | 5 | pub struct Split 6 | where 7 | Q: 'static + Send + Clone, 8 | { 9 | name: String, 10 | chan_size: usize, 11 | n: usize, 12 | 13 | input: Option>, 14 | handle: Option>>, 15 | } 16 | 17 | impl Split 18 | where 19 | Q: 'static + Send + Clone, 20 | { 21 | pub fn new(name: String, input: mpsc::Receiver, n: usize) -> Self { 22 | Split { 23 | name, 24 | chan_size: mq::DEFAULT_CHAN_SIZE, 25 | n, 26 | 27 | input: Some(input), 28 | handle: None, 29 | } 30 | } 31 | 32 | pub fn set_chan_size(&mut self, chan_size: usize) -> &mut Self { 33 | self.chan_size = chan_size; 34 | self 35 | } 36 | 37 | pub fn spawn(&mut self) -> Vec> { 38 | let (name, chan_size) = (self.name.clone(), self.chan_size); 39 | let (mut txs, mut outputs) = (vec![], vec![]); 40 | 41 | (0..self.n).for_each(|_| { 42 | let (tx, output) = mpsc::sync_channel(self.chan_size); 43 | txs.push(tx); 44 | outputs.push(output); 45 | }); 46 | 47 | let input = self.input.take().unwrap(); 48 | self.handle = Some(thread::spawn(move || action(name, chan_size, input, txs))); 49 | 50 | outputs 51 | } 52 | 53 | pub fn close_wait(self) -> Result<()> { 54 | match self.handle { 55 | Some(handle) => match handle.join() { 56 | Ok(res) => res, 57 | Err(_) => { 58 | err_at!(ThreadFail, msg: "thread fail Split<{:?}>", self.name) 59 | } 60 | }, 61 | None => Ok(()), 62 | } 63 | } 64 | } 65 | 66 | fn action( 67 | name: String, 68 | chan_size: usize, 69 | input: mpsc::Receiver, 70 | txs: Vec>, 71 | ) -> Result<()> 72 | where 73 | Q: 'static + Send + Clone, 74 | { 75 | loop { 76 | match mq::get_messages(&input, chan_size) { 77 | Ok(qmsgs) => { 78 | for qmsg in qmsgs.into_iter() { 79 | for tx in txs.iter() { 80 | err_at!( 81 | IPCFail, 82 | tx.send(qmsg.clone()), 83 | "thread Split<{:?}>", 84 | name 85 | )?; 86 | } 87 | } 88 | } 89 | Err(mpsc::TryRecvError::Disconnected) => break, 90 | _ => unreachable!(), 91 | } 92 | } 93 | 94 | Ok(()) 95 | } 96 | -------------------------------------------------------------------------------- /src/parsec/lex.rs: -------------------------------------------------------------------------------- 1 | use crate::parsec::{Lexer, Position}; 2 | 3 | /// Lex type implementing a lexer compatible with rdms/parsec. 4 | #[derive(Clone, Debug)] 5 | pub struct Lex { 6 | text: String, 7 | row_no: usize, // start from ZERO 8 | col_no: usize, // start from ZERO 9 | cursor: usize, // start from ZERO 10 | } 11 | 12 | impl Lex { 13 | pub fn new(text: String) -> Lex { 14 | Lex { text, row_no: 0, col_no: 0, cursor: 0 } 15 | } 16 | } 17 | 18 | impl Lexer for Lex { 19 | fn to_position(&self) -> Position { 20 | Position(self.row_no + 1, self.col_no + 1) 21 | } 22 | 23 | fn to_cursor(&self) -> usize { 24 | self.cursor 25 | } 26 | 27 | fn move_cursor(&mut self, n: usize) { 28 | let r = self.cursor..(self.cursor + n); 29 | for ch in self.text[r].chars() { 30 | match ch { 31 | '\n' => { 32 | self.row_no += 1; 33 | self.col_no = 0; 34 | } 35 | _ => self.col_no += 1, 36 | } 37 | } 38 | 39 | self.cursor += n; 40 | } 41 | 42 | fn as_str(&self) -> &str { 43 | &self.text[self.cursor..] 44 | } 45 | 46 | fn save(&self) -> Lex { 47 | #[cfg(feature = "debug")] 48 | println!(">>> save-lex @{}", self.to_position()); 49 | 50 | Lex { 51 | text: String::default(), 52 | row_no: self.row_no, 53 | col_no: self.col_no, 54 | cursor: self.cursor, 55 | } 56 | } 57 | 58 | fn restore(&mut self, other: Self) { 59 | #[cfg(feature = "debug")] 60 | println!(">>> restore-lex @{}", other.to_position()); 61 | 62 | self.row_no = other.row_no; 63 | self.col_no = other.col_no; 64 | self.cursor = other.cursor; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/robt/entry_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 3 | 4 | #[test] 5 | fn test_robt_entry() { 6 | let seed: u64 = random(); 7 | println!("test_entry {}", seed); 8 | let mut rng = StdRng::seed_from_u64(seed); 9 | let key = 10; 10 | 11 | let dbnt = match rng.gen::() % 2 { 12 | 0 => dbs::Entry::::new(key, rng.gen(), 1), 13 | 1 => dbs::Entry::::new_delete(key, 1), 14 | _ => unreachable!(), 15 | }; 16 | for seqno in 2..10 { 17 | match rng.gen::() % 2 { 18 | 0 => { 19 | dbnt.insert(rng.gen(), seqno); 20 | } 21 | 1 => { 22 | dbnt.delete(seqno); 23 | } 24 | _ => unreachable!(), 25 | } 26 | } 27 | let zz = Entry::::from(dbnt.clone()); 28 | let mm = Entry::::new_mm(key, 100); 29 | let mz = Entry::::new_mz(key, 200); 30 | 31 | assert_eq!(dbnt, dbs::Entry::try_from(Entry::from(dbnt.clone())).unwrap()); 32 | assert_eq!(zz.as_key(), &key); 33 | assert_eq!(mz.as_key(), &key); 34 | assert_eq!(mm.as_key(), &key); 35 | assert_eq!(zz.borrow_key(), &key); 36 | assert_eq!(mz.borrow_key(), &key); 37 | assert_eq!(mm.borrow_key(), &key); 38 | assert_eq!(zz.to_key(), key); 39 | assert_eq!(mz.to_key(), key); 40 | assert_eq!(mm.to_key(), key); 41 | assert!(zz.is_zblock()); 42 | assert!(!mz.is_zblock()); 43 | assert!(!mm.is_zblock()); 44 | 45 | let res = mm.clone().into_reference(0, true).unwrap(); 46 | assert_eq!(mm, res.0); 47 | assert!(res.1.is_empty()); 48 | let res = mz.clone().into_reference(0, true).unwrap(); 49 | assert_eq!(mz, res.0); 50 | assert!(res.1.is_empty()); 51 | 52 | let (zz_ref, data) = zz.clone().into_reference(0, true).unwrap(); 53 | assert_eq!(zz_ref.to_key(), key); 54 | 55 | let mut data = io::Cursor::new(data); 56 | assert_eq!(zz_ref.into_native(&mut data, true).unwrap(), zz); 57 | } 58 | -------------------------------------------------------------------------------- /src/robt/files.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryFrom, ffi, fmt, path, result}; 2 | 3 | use crate::{Error, Result}; 4 | 5 | /// An Index file is uniquely locatable by providing the `dir` and name. 6 | /// where `dir` is the directory in which the index file is located and `name` 7 | /// is the unique name for the index. `format!("{}-robt.indx", name)` 8 | #[derive(Clone)] 9 | pub struct IndexFileName(pub ffi::OsString); 10 | 11 | impl From for IndexFileName { 12 | fn from(name: String) -> IndexFileName { 13 | let file_name = format!("{}-robt.indx", name); 14 | IndexFileName(AsRef::::as_ref(&file_name).to_os_string()) 15 | } 16 | } 17 | 18 | impl TryFrom for String { 19 | type Error = Error; 20 | 21 | fn try_from(fname: IndexFileName) -> Result { 22 | let ffpp = path::Path::new(&fname.0); 23 | let fname = || -> Option<&str> { 24 | let fname = ffpp.file_name()?; 25 | if fname.to_str()?.ends_with("-robt.indx") { 26 | Some(path::Path::new(fname).file_stem()?.to_str()?) 27 | } else { 28 | None 29 | } 30 | }(); 31 | 32 | match fname { 33 | Some(fname) => Ok(fname.strip_suffix("-robt").unwrap().to_string()), 34 | None => err_at!(InvalidFile, msg: "{:?}", ffpp), 35 | } 36 | } 37 | } 38 | 39 | impl From for ffi::OsString { 40 | fn from(name: IndexFileName) -> ffi::OsString { 41 | name.0 42 | } 43 | } 44 | 45 | impl fmt::Display for IndexFileName { 46 | fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { 47 | match self.0.to_str() { 48 | Some(s) => write!(f, "{}", s), 49 | None => write!(f, "{:?}", self.0), 50 | } 51 | } 52 | } 53 | 54 | /// A Value log file is uniquely locatable by providing the `dir` and name. 55 | /// where `dir` is the directory in which the index/vlog file is located and 56 | /// `name` is the unique name for the index/vlog. `format!("{}-robt.vlog", name)` 57 | #[derive(Clone)] 58 | pub struct VlogFileName(pub ffi::OsString); 59 | 60 | impl From for VlogFileName { 61 | fn from(name: String) -> VlogFileName { 62 | let file_name = format!("{}-robt.vlog", name); 63 | VlogFileName(AsRef::::as_ref(&file_name).to_os_string()) 64 | } 65 | } 66 | 67 | impl From for ffi::OsString { 68 | fn from(val: VlogFileName) -> ffi::OsString { 69 | val.0 70 | } 71 | } 72 | 73 | impl TryFrom for String { 74 | type Error = Error; 75 | 76 | fn try_from(fname: VlogFileName) -> Result { 77 | let ffpp = path::Path::new(&fname.0); 78 | 79 | let fname = || -> Option<&str> { 80 | let fname = ffpp.file_name()?; 81 | if fname.to_str()?.ends_with("-robt.vlog") { 82 | Some(path::Path::new(fname).file_stem()?.to_str()?) 83 | } else { 84 | None 85 | } 86 | }(); 87 | 88 | match fname { 89 | Some(fname) => Ok(fname.strip_suffix("-robt").unwrap().to_string()), 90 | None => err_at!(InvalidFile, msg: "{:?}", ffpp), 91 | } 92 | } 93 | } 94 | 95 | impl fmt::Display for VlogFileName { 96 | fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { 97 | match self.0.to_str() { 98 | Some(s) => write!(f, "{}", s), 99 | None => write!(f, "{:?}", self.0), 100 | } 101 | } 102 | } 103 | 104 | #[cfg(test)] 105 | #[path = "files_test.rs"] 106 | mod files_test; 107 | -------------------------------------------------------------------------------- /src/robt/files_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_robt_index_file() { 5 | let name = "test-index-file".to_string(); 6 | let out = AsRef::::as_ref("test-index-file-robt.indx").to_os_string(); 7 | 8 | let index_file = IndexFileName::from(name.clone()); 9 | assert_eq!(index_file.0, out); 10 | assert_eq!(String::try_from(index_file.clone()).unwrap(), name); 11 | assert_eq!(ffi::OsString::from(index_file), out); 12 | } 13 | 14 | #[test] 15 | fn test_robt_vlog_file() { 16 | let name = "test-vlog-file".to_string(); 17 | let out = AsRef::::as_ref("test-vlog-file-robt.vlog").to_os_string(); 18 | 19 | let vlog_file = VlogFileName::from(name); 20 | assert_eq!(vlog_file.0, out); 21 | assert_eq!(ffi::OsString::from(vlog_file), out); 22 | } 23 | -------------------------------------------------------------------------------- /src/robt/flush.rs: -------------------------------------------------------------------------------- 1 | use fs2::FileExt; 2 | 3 | use std::{convert::TryFrom, ffi, fs, mem}; 4 | 5 | use crate::{util, write_file, Error, Result}; 6 | 7 | pub enum Flusher { 8 | File { 9 | loc: ffi::OsString, 10 | fpos: u64, 11 | th: Option, u64, Result>>, 12 | tx: Option, u64>>, 13 | }, 14 | None, 15 | } 16 | 17 | impl Drop for Flusher { 18 | fn drop(&mut self) { 19 | match self { 20 | Flusher::None => (), 21 | Flusher::File { tx, .. } => mem::drop(tx.take()), 22 | } 23 | } 24 | } 25 | 26 | impl Flusher { 27 | // Create a new flusher thread, there are two flushers for `robt` index, one 28 | // for the index-file and the other is for value-file, if enabled. 29 | pub fn new(loc: &ffi::OsStr, create: bool, chan_size: usize) -> Result { 30 | let (fd, fpos) = if create { 31 | (util::files::create_file_a(loc)?, 0) 32 | } else { 33 | let fpos = err_at!(IOError, fs::metadata(loc))?.len().saturating_sub(1); 34 | (util::files::open_file_a(loc)?, fpos) 35 | }; 36 | 37 | let ffpp = loc.to_os_string(); 38 | let th = util::Thread::new_sync( 39 | "flusher", 40 | chan_size, 41 | move |rx: util::thread::Rx, u64>| { 42 | move || thread_flush(ffpp, fd, rx, fpos) 43 | }, 44 | ); 45 | let tx = th.to_tx(); 46 | 47 | let val = Flusher::File { 48 | loc: loc.to_os_string(), 49 | fpos, 50 | th: Some(th), 51 | tx: Some(tx), 52 | }; 53 | 54 | Ok(val) 55 | } 56 | 57 | // create an empty flusher. 58 | pub fn empty() -> Flusher { 59 | Flusher::None 60 | } 61 | 62 | pub fn to_location(&self) -> Option { 63 | match self { 64 | Flusher::File { loc, .. } => Some(loc.clone()), 65 | Flusher::None => None, 66 | } 67 | } 68 | 69 | // return the latest file position. 70 | pub fn to_fpos(&self) -> Option { 71 | match self { 72 | Flusher::File { fpos, .. } => Some(*fpos), 73 | Flusher::None => None, 74 | } 75 | } 76 | 77 | // flush data, call to this function only batches data. 78 | pub fn flush(&mut self, data: Vec) -> Result<()> { 79 | match self { 80 | Flusher::File { fpos, tx, .. } => { 81 | *fpos = tx.as_ref().unwrap().request(data)? 82 | } 83 | Flusher::None => (), 84 | }; 85 | Ok(()) 86 | } 87 | 88 | // close this flusher and associated thread, after syncing data to disk. 89 | pub fn close(&mut self) -> Result { 90 | match self { 91 | Flusher::File { tx, th, .. } => { 92 | mem::drop(tx.take()); 93 | th.take().unwrap().join()? 94 | } 95 | Flusher::None => Ok(0), 96 | } 97 | } 98 | } 99 | 100 | fn thread_flush( 101 | loc: ffi::OsString, 102 | mut fd: fs::File, 103 | rx: util::thread::Rx, u64>, 104 | mut fpos: u64, 105 | ) -> Result { 106 | // println!("thread_flush lock_shared <"); 107 | err_at!(IOError, fd.lock_shared(), "fail read lock for {:?}", loc)?; 108 | 109 | for (data, res_tx) in rx { 110 | // println!("flush {:?} fpos:{} len:{}", loc, fpos, data.len()); 111 | write_file!(fd, &data, &loc, "flushing file")?; 112 | 113 | fpos += u64::try_from(data.len()).unwrap(); 114 | res_tx.map(|tx| tx.send(fpos).ok()); 115 | } 116 | 117 | err_at!(IOError, fd.sync_all(), "fail sync_all {:?}", loc)?; 118 | err_at!(IOError, fd.unlock(), "fail read unlock {:?}", loc)?; 119 | // println!("thread_flush unlock >"); 120 | 121 | Ok(fpos) 122 | } 123 | 124 | #[cfg(test)] 125 | #[path = "flush_test.rs"] 126 | mod flush_test; 127 | -------------------------------------------------------------------------------- /src/robt/flush_test.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::{self, Unstructured}; 2 | use rand::{prelude::random, rngs::StdRng, seq::SliceRandom, Rng, SeedableRng}; 3 | 4 | use std::{cmp, fs, io::Read}; 5 | 6 | use super::*; 7 | 8 | #[test] 9 | fn test_robt_flush() { 10 | let seed: u64 = random(); 11 | println!("test_flush {}", seed); 12 | let mut rng = StdRng::seed_from_u64(seed); 13 | 14 | let dir = std::env::temp_dir().join("test_flush"); 15 | fs::create_dir_all(&dir).unwrap(); 16 | let file = dir.join("test-flusher.data"); 17 | println!("flush to file {:?}", file); 18 | fs::remove_file(&file).ok(); 19 | 20 | let mut flusher = { 21 | let bytes = rng.gen::<[u8; 32]>(); 22 | let mut uns = Unstructured::new(&bytes); 23 | 24 | let create = true; 25 | let chan_size: usize = cmp::min(uns.arbitrary().unwrap(), 12); 26 | Flusher::new(file.as_ref(), create, chan_size).unwrap() 27 | }; 28 | 29 | let mut fpos = 0; 30 | let mut filedata: Vec = vec![]; 31 | for _i in 0..1000 { 32 | let mut data: Vec = vec![0; 4096]; 33 | data[..256].copy_from_slice(&(0..=255).collect::>()); 34 | data.shuffle(&mut rng); 35 | filedata.extend(&data); 36 | flusher.flush(data).unwrap(); 37 | fpos += 4096; 38 | assert_eq!(fpos, flusher.to_fpos().unwrap()); 39 | } 40 | assert_eq!(flusher.close().unwrap(), 4096000); 41 | let mut flushed_data = vec![]; 42 | let n = fs::OpenOptions::new() 43 | .read(true) 44 | .open(&file) 45 | .unwrap() 46 | .read_to_end(&mut flushed_data) 47 | .unwrap(); 48 | assert_eq!(n, 4096000); 49 | 50 | assert_eq!(flushed_data, filedata); 51 | } 52 | -------------------------------------------------------------------------------- /src/robt/lsm.rs: -------------------------------------------------------------------------------- 1 | //! Module `lsm` implement read API across LSM snapshots of 2 | //! single index instance. 3 | 4 | use cbordata::FromCbor; 5 | 6 | use std::cmp; 7 | 8 | use crate::{ 9 | dbs, 10 | robt::{reader::IterLsm, Entry}, 11 | Result, 12 | }; 13 | 14 | pub struct YIter<'a, K, V, I, E> 15 | where 16 | K: Ord + FromCbor, 17 | V: dbs::Diff + FromCbor, 18 | ::Delta: FromCbor, 19 | I: Iterator>, 20 | E: Into>, 21 | { 22 | snap: I, 23 | iter: IterLsm<'a, K, V>, 24 | s_entry: Option>>, 25 | i_entry: Option>>, 26 | } 27 | 28 | impl<'a, K, V, I, E> YIter<'a, K, V, I, E> 29 | where 30 | K: Ord + FromCbor, 31 | V: dbs::Diff + FromCbor, 32 | ::Delta: FromCbor, 33 | I: Iterator>, 34 | E: Into>, 35 | { 36 | pub fn new(mut snap: I, mut iter: IterLsm<'a, K, V>) -> YIter<'a, K, V, I, E> { 37 | let s_entry = snap.next().map(|re| re.map(|e| e.into())); 38 | let i_entry = iter.next(); 39 | YIter { snap, iter, s_entry, i_entry } 40 | } 41 | } 42 | 43 | impl<'a, K, V, I, E> Iterator for YIter<'a, K, V, I, E> 44 | where 45 | K: Clone + Ord + FromCbor, 46 | V: dbs::Diff + FromCbor, 47 | ::Delta: FromCbor + From, 48 | I: Iterator>, 49 | E: Into>, 50 | { 51 | type Item = Result>; 52 | 53 | fn next(&mut self) -> Option { 54 | match (self.s_entry.take(), self.i_entry.take()) { 55 | (Some(Ok(se)), Some(Ok(ie))) => { 56 | let cmpval = se.as_key().cmp(ie.as_key()); 57 | //println!( 58 | // "yiter se:{} ie:{} {:?}", 59 | // se.to_seqno(), 60 | // ie.to_seqno(), 61 | // cmpval 62 | //); 63 | match cmpval { 64 | cmp::Ordering::Less => { 65 | self.s_entry = self.snap.next().map(|re| re.map(|e| e.into())); 66 | self.i_entry = Some(Ok(ie)); 67 | Some(Ok(se)) 68 | } 69 | cmp::Ordering::Greater => { 70 | self.i_entry = self.iter.next(); 71 | self.s_entry = Some(Ok(se)); 72 | Some(Ok(ie)) 73 | } 74 | cmp::Ordering::Equal => { 75 | self.s_entry = self.snap.next().map(|re| re.map(|e| e.into())); 76 | self.i_entry = self.iter.next(); 77 | let (a, b) = (ie.to_seqno().unwrap(), se.to_seqno().unwrap()); 78 | let (old, new) = if a < b { (ie, se) } else { (se, ie) }; 79 | Some(Ok(old.commit(new))) 80 | } 81 | } 82 | } 83 | (Some(Ok(se)), None) => { 84 | self.s_entry = self.snap.next().map(|re| re.map(|e| e.into())); 85 | Some(Ok(se)) 86 | } 87 | (None, Some(Ok(ie))) => { 88 | self.i_entry = self.iter.next(); 89 | Some(Ok(ie)) 90 | } 91 | (Some(Ok(_xe)), Some(Err(err))) => Some(Err(err)), 92 | (Some(Err(err)), Some(Ok(_ye))) => Some(Err(err)), 93 | _ => None, 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/robt/marker.rs: -------------------------------------------------------------------------------- 1 | use lazy_static::lazy_static; 2 | 3 | lazy_static! { 4 | pub static ref ROOT_MARKER: Vec = { 5 | let marker = "அறம் செய விரும்பு"; 6 | marker.as_bytes().to_vec() 7 | }; 8 | } 9 | -------------------------------------------------------------------------------- /src/robt/scans_test.rs: -------------------------------------------------------------------------------- 1 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 2 | 3 | use super::*; 4 | use crate::{ 5 | dbs::{self, Bloom}, 6 | llrb, 7 | }; 8 | 9 | #[test] 10 | fn test_robt_build_scan() { 11 | use std::time::Duration; 12 | 13 | let seed: u64 = random(); 14 | let mut rng = StdRng::seed_from_u64(seed); 15 | println!("test_build_scan {}", seed); 16 | 17 | let inserts = 1_000_000; 18 | let mdb = llrb::load_index::(seed, 0, inserts, 0, 1_000, None); 19 | 20 | let start_seqno = rng.gen::() % ((mdb.len() as u64) * 2); 21 | let mut iter = BuildScan::new(mdb.iter().unwrap().map(Ok), start_seqno); 22 | let mut count = 0; 23 | for _ in &mut iter { 24 | count += 1; 25 | } 26 | assert_eq!(count, mdb.len() as u64, "{} {}", count, mdb.len()); 27 | 28 | let (build_time, seqno, count, _deleted, epoch, mut iter) = iter.unwrap().unwrap(); 29 | println!("BuildScan build_time {:?}", Duration::from_nanos(build_time)); 30 | println!("BuildScan epoch {:?}", Duration::from_nanos(epoch)); 31 | assert_eq!(seqno, cmp::max(start_seqno, mdb.to_seqno())); 32 | assert_eq!(count, mdb.len() as u64, "{} {}", count, mdb.len()); 33 | assert_eq!(iter.next(), None); 34 | } 35 | 36 | #[test] 37 | fn test_robt_nobitmap_scan() { 38 | use crate::bitmaps::NoBitmap; 39 | 40 | let seed: u64 = random(); 41 | let mut rng = StdRng::seed_from_u64(seed); 42 | println!("test_nobitmap_scan {}", seed); 43 | 44 | let inserts = 1_000_000; 45 | let mdb = llrb::load_index::(seed, 0, inserts, 0, 1_000, None); 46 | 47 | // with NoBitmap 48 | let mut iter = 49 | BitmappedScan::new(mdb.iter().unwrap().map(|e| Ok(e.into())), NoBitmap); 50 | let len: usize = iter.by_ref().map(|_| 1).sum(); 51 | let (mut bitmap, mut iter) = iter.unwrap().unwrap(); 52 | bitmap.build().unwrap(); 53 | assert_eq!(len, mdb.len()); 54 | assert_eq!(iter.next(), None); 55 | assert_eq!(bitmap.to_bytes().unwrap().len(), 0); 56 | let bitmap = NoBitmap::from_bytes(&bitmap.to_bytes().unwrap()).unwrap().0; 57 | for _i in 0..1_000_000 { 58 | let key = rng.gen::(); 59 | assert!(bitmap.contains(&key), "{}", key); 60 | } 61 | } 62 | 63 | #[test] 64 | fn test_robt_xorfilter_scan() { 65 | use xorfilter::Xor8; 66 | 67 | let seed: u64 = random(); 68 | let mut rng = StdRng::seed_from_u64(seed); 69 | println!("test_xorfilter_scan {}", seed); 70 | 71 | let inserts = 1_000_000; 72 | let mdb = llrb::load_index::(seed, 0, inserts, 0, 1_000, None); 73 | 74 | // with xorfilter 75 | let mut iter = 76 | BitmappedScan::new(mdb.iter().unwrap().map(|e| Ok(e.into())), Xor8::new()); 77 | let len: usize = iter.by_ref().map(|_| 1).sum(); 78 | let (mut bitmap, mut iter) = iter.unwrap().unwrap(); 79 | bitmap.build().unwrap(); 80 | assert_eq!(len, mdb.len()); 81 | assert_eq!(iter.next(), None); 82 | let bitma = { 83 | let bytes = ::to_bytes(&bitmap).unwrap(); 84 | ::from_bytes(&bytes).unwrap().0 85 | }; 86 | let mut found_keys = 0; 87 | for _i in 0..1_000_000 { 88 | let key = rng.gen::(); 89 | if mdb.get(&key).is_ok() { 90 | found_keys += 1; 91 | assert!(bitma.contains(&key), "{}", key); 92 | } 93 | } 94 | println!("found keys in xor8 {}", found_keys); 95 | } 96 | -------------------------------------------------------------------------------- /src/robt/vlog_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_robt_vlog_value() { 5 | let dbval = { 6 | let (value, seqno) = (10, 1); 7 | dbs::Value::U { value, seqno } 8 | }; 9 | 10 | assert_eq!(dbval, dbs::Value::try_from(Value::from(dbval.clone())).unwrap()); 11 | 12 | let value = Value::from(dbval.clone()); 13 | let (value, data) = value.into_reference(1023).unwrap(); 14 | let mut buf = vec![0; 1023]; 15 | buf.extend(&data); 16 | assert_eq!(value, Value::R { fpos: 1023, length: data.len() as u64 }); 17 | 18 | let mut buf = io::Cursor::new(buf); 19 | assert_eq!(value.into_native(&mut buf).unwrap(), Value::from(dbval)); 20 | } 21 | 22 | #[test] 23 | fn test_robt_vlog_delta() { 24 | let dbdelta = { 25 | let (delta, seqno) = (10, 1); 26 | dbs::Delta::U { delta, seqno } 27 | }; 28 | 29 | assert_eq!(dbdelta, dbs::Delta::try_from(Delta::from(dbdelta.clone())).unwrap()); 30 | 31 | let delta = Delta::from(dbdelta.clone()); 32 | let (delta, data) = delta.into_reference(1023).unwrap(); 33 | let mut buf = vec![0; 1023]; 34 | buf.extend(&data); 35 | assert_eq!(delta, Delta::R { fpos: 1023, length: data.len() as u64 }); 36 | 37 | let mut buf = io::Cursor::new(buf); 38 | assert_eq!(delta.into_native(&mut buf).unwrap(), Delta::from(dbdelta)); 39 | } 40 | -------------------------------------------------------------------------------- /src/sys/mod.rs: -------------------------------------------------------------------------------- 1 | mod system; 2 | 3 | pub use system::{Disk, LoadAvg, MemInfo, Network, Process, System}; 4 | -------------------------------------------------------------------------------- /src/sys/system.rs: -------------------------------------------------------------------------------- 1 | use crate::{Error, Result}; 2 | 3 | pub struct System { 4 | pub uname: Uname, 5 | pub boot_time: chrono::NaiveDateTime, 6 | pub num_cpu: usize, 7 | pub cpu_speed: usize, // in MHz 8 | pub disks: Vec, 9 | pub process: Vec, 10 | pub networks: Vec, 11 | } 12 | 13 | #[derive(Clone, Debug)] 14 | pub struct Uname { 15 | pub host_name: String, 16 | pub os_type: String, 17 | pub os_release: String, 18 | } 19 | 20 | impl Uname { 21 | pub fn new() -> Result { 22 | let host_name = err_at!(IOError, sys_info::hostname())?; 23 | let os_type = err_at!(IOError, sys_info::os_type())?; 24 | let os_release = err_at!(IOError, sys_info::os_release())?; 25 | 26 | let val = Uname { host_name, os_type, os_release }; 27 | 28 | Ok(val) 29 | } 30 | } 31 | 32 | pub struct Disk { 33 | pub name: String, 34 | pub total: usize, 35 | pub free: usize, 36 | } 37 | 38 | pub struct LoadAvg { 39 | pub one: f64, 40 | pub five: f64, 41 | pub fifteen: f64, 42 | } 43 | 44 | pub struct MemInfo { 45 | pub total: usize, 46 | pub free: usize, 47 | pub avail: usize, 48 | pub buffers: usize, 49 | pub cached: usize, 50 | pub swap_total: usize, 51 | pub swap_free: usize, 52 | } 53 | 54 | pub struct Network { 55 | pub node_name: String, 56 | } 57 | 58 | pub struct Process; 59 | 60 | #[cfg(test)] 61 | #[path = "system_test.rs"] 62 | mod system_test; 63 | -------------------------------------------------------------------------------- /src/sys/system_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn test_uname() { 5 | println!("{:?}", Uname::new().unwrap()); 6 | } 7 | -------------------------------------------------------------------------------- /src/trie/mod.rs: -------------------------------------------------------------------------------- 1 | mod trie; 2 | 3 | pub use trie::Trie; 4 | 5 | #[derive(Copy, Clone)] 6 | pub enum WalkRes { 7 | Ok, 8 | SkipDepth, 9 | SkipBreath, 10 | SkipBoth, 11 | } 12 | -------------------------------------------------------------------------------- /src/util/cmdline.rs: -------------------------------------------------------------------------------- 1 | use std::ffi; 2 | 3 | /// Parse `args` (if args is None, [std::env::args_os] is used) and return list 4 | /// of args before `sub-command` list of args after `sub-command`. 5 | /// 6 | /// Return type: `(cmd-args, sub-cmd, subcmd-args)` 7 | pub fn parse_os_args( 8 | args: Option>, 9 | ) -> (Vec, ffi::OsString, Vec) { 10 | let args_os: Vec = { 11 | args 12 | // while taking from std::env skip the first item, it is command-line 13 | .unwrap_or_else(|| std::env::args_os().skip(1).collect()) 14 | .into_iter() 15 | // .map(|s| s.to_str().unwrap().to_string()) 16 | .collect() 17 | }; 18 | 19 | let mut iter = args_os.clone().into_iter().enumerate(); 20 | 21 | let is_cmd_option = |arg: &ffi::OsString| -> bool { 22 | matches!(arg.to_str(), Some(arg) if arg.starts_with('-')) 23 | }; 24 | 25 | loop { 26 | match iter.next() { 27 | None => break (args_os, ffi::OsString::new(), vec![]), 28 | Some((i, arg)) if !is_cmd_option(&arg) && i < (args_os.len() - 1) => { 29 | break (args_os[..i].to_vec(), args_os[i].clone(), args_os[i..].to_vec()) 30 | } 31 | Some((i, arg)) if !is_cmd_option(&arg) => { 32 | break (args_os[..i].to_vec(), args_os[i].clone(), vec![]) 33 | } 34 | _ => (), 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/util/files_test.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fs, 3 | io::{self, Read, Seek, Write}, 4 | path::PathBuf, 5 | }; 6 | 7 | use crate::error::Error; 8 | 9 | use super::*; 10 | 11 | #[test] 12 | fn test_open_file_rw() { 13 | // case 1: try to create empty file. 14 | let dir = PathBuf::new(); 15 | let fd = create_file_a(dir.as_os_str()); 16 | match fd.expect_err("expected invalid-file") { 17 | Error::InvalidFile(_, _) => (), 18 | err => panic!("{:?}", err), 19 | } 20 | 21 | // case 2: try to create root dir as file. 22 | let mut dir = PathBuf::new(); 23 | dir.push("/"); 24 | let fd = create_file_a(dir.as_os_str()); 25 | match fd.expect_err("expected invalid-file") { 26 | Error::InvalidFile(_, _) => (), 27 | err => panic!("{:?}", err), 28 | } 29 | 30 | // case 3: with valid file, reuse: false 31 | let mut dir = std::env::temp_dir(); 32 | dir.push("rust.rdms.util.open_file_rw.txt"); 33 | let file = dir.as_path(); 34 | 35 | fs::remove_file(file).ok(); 36 | 37 | let mut fd = create_file_a(file.as_os_str()).expect("open-write"); 38 | assert_eq!(fd.write("hello world".as_bytes()).expect("write failed"), 11); 39 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 40 | assert_eq!(fd.write("i world".as_bytes()).expect("write failed"), 7); 41 | 42 | let txt = fs::read(file).expect("read failed"); 43 | assert_eq!(std::str::from_utf8(&txt).unwrap(), "hello worldi world"); 44 | 45 | // case 4: with valid file, reuse: false, recreate 46 | let mut dir = std::env::temp_dir(); 47 | dir.push("rust.rdms.util.open_file_rw.txt"); 48 | let file = dir.as_path(); 49 | 50 | let mut fd = create_file_a(file.as_os_str()).expect("open-write"); 51 | assert_eq!(fd.write("hello world".as_bytes()).expect("write failed"), 11); 52 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 53 | assert_eq!(fd.write("i world".as_bytes()).expect("write failed"), 7); 54 | 55 | let txt = fs::read(file).expect("read failed"); 56 | assert_eq!(std::str::from_utf8(&txt).unwrap(), "hello worldi world"); 57 | 58 | // case 5: with valid file, reuse: true, reuse file. 59 | let mut dir = std::env::temp_dir(); 60 | dir.push("rust.rdms.util.open_file_rw.txt"); 61 | let file = dir.as_path(); 62 | 63 | let mut fd = open_file_a(file.as_os_str()).expect("open-write"); 64 | assert_eq!(fd.write("hello world".as_bytes()).expect("write failed"), 11); 65 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 66 | assert_eq!(fd.write("i world".as_bytes()).expect("write failed"), 7); 67 | 68 | let txt = fs::read(&file).expect("read failed"); 69 | assert_eq!( 70 | std::str::from_utf8(&txt).unwrap(), 71 | "hello worldi worldhello worldi world" 72 | ); 73 | 74 | // case 6: read file. 75 | let mut fd = open_file_r(file.as_ref()).expect("open-read"); 76 | let mut txt = [0_u8; 36]; 77 | assert_eq!(fd.read(&mut txt).expect("read failed"), txt.len()); 78 | assert_eq!( 79 | std::str::from_utf8(&txt).unwrap(), 80 | "hello worldi worldhello worldi world" 81 | ); 82 | 83 | fd.seek(io::SeekFrom::Start(1)).expect("seek failed"); 84 | assert_eq!(fd.read(&mut txt[0..35]).expect("read failed"), 35); 85 | assert_eq!( 86 | std::str::from_utf8(&txt).unwrap(), 87 | "ello worldi worldhello worldi worldd" 88 | ); 89 | 90 | fd.write("hello world".as_bytes()).expect_err("expected write error"); 91 | } 92 | -------------------------------------------------------------------------------- /src/util/mod_test.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | // TODO: review mod.rs and mod_test.rs after full refactoring. 4 | 5 | #[test] 6 | fn test_as_sharded_array() { 7 | for i in 0..100 { 8 | let array: Vec = (0..i).collect(); 9 | for n_shards in 0..100 { 10 | let acc = as_sharded_array(&array, n_shards); 11 | assert_eq!(acc.len(), n_shards); 12 | assert!(acc.len() <= n_shards, "{} {}", acc.len(), n_shards); 13 | if n_shards > 0 { 14 | let res: Vec = { 15 | let iter = acc.iter().flat_map(|shard| shard.to_vec()); 16 | iter.collect() 17 | }; 18 | assert_eq!(array, res); 19 | } 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/util/print.rs: -------------------------------------------------------------------------------- 1 | pub trait PrettyRow { 2 | fn to_format() -> prettytable::format::TableFormat; 3 | 4 | fn to_head() -> prettytable::Row; 5 | 6 | fn to_row(&self) -> prettytable::Row; 7 | } 8 | 9 | pub fn make_table(rows: &mut [R]) -> prettytable::Table 10 | where 11 | R: PrettyRow, 12 | { 13 | let mut table = prettytable::Table::new(); 14 | 15 | match rows.len() { 16 | 0 => table, 17 | _ => { 18 | table.set_titles(R::to_head()); 19 | rows.iter_mut().for_each(|r| { 20 | table.add_row(r.to_row()); 21 | }); 22 | table.set_format(R::to_format()); 23 | table 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/web/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod html; 2 | pub mod selector; 3 | -------------------------------------------------------------------------------- /src/wral/entry.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::Arbitrary; 2 | use cbordata::Cborize; 3 | 4 | use std::{ 5 | cmp, 6 | fmt::{self, Display}, 7 | result, 8 | }; 9 | 10 | #[allow(unused_imports)] 11 | use crate::wral::Wal; 12 | 13 | /// Single Op-entry in Write-ahead-log. 14 | /// 15 | /// The actual operation is serialized and opaque to [Wal] instance. Applications 16 | /// can iterate over the [Wal] instance for each entry, that is, an Entry value 17 | /// is typically read-only for applications. 18 | #[derive(Debug, Clone, Eq, Default, Cborize, Arbitrary)] 19 | pub struct Entry { 20 | pub seqno: u64, // Seqno for this entry, Monotonically increasing number. 21 | pub op: Vec, // Write operation, in serialized format, opaque to logging. 22 | } 23 | 24 | impl PartialEq for Entry { 25 | fn eq(&self, other: &Self) -> bool { 26 | self.seqno.eq(&other.seqno) 27 | } 28 | } 29 | 30 | impl Display for Entry { 31 | fn fmt(&self, f: &mut fmt::Formatter) -> result::Result<(), fmt::Error> { 32 | write!(f, "entry", self.seqno) 33 | } 34 | } 35 | 36 | impl PartialOrd for Entry { 37 | fn partial_cmp(&self, other: &Self) -> Option { 38 | Some(self.cmp(other)) 39 | } 40 | } 41 | 42 | impl Ord for Entry { 43 | fn cmp(&self, other: &Self) -> cmp::Ordering { 44 | self.seqno.cmp(&other.seqno) 45 | } 46 | } 47 | 48 | impl Entry { 49 | const ID: u32 = 0x0; 50 | 51 | #[inline] 52 | pub fn new(seqno: u64, op: Vec) -> Entry { 53 | Entry { seqno, op } 54 | } 55 | 56 | /// Return the entry's seqno. 57 | #[inline] 58 | pub fn to_seqno(&self) -> u64 { 59 | self.seqno 60 | } 61 | 62 | /// Unwrap entry's seqno and serialized operation. 63 | #[inline] 64 | pub fn unwrap(self) -> (u64, Vec) { 65 | (self.seqno, self.op) 66 | } 67 | } 68 | 69 | #[cfg(test)] 70 | #[path = "entry_test.rs"] 71 | mod entry_test; 72 | -------------------------------------------------------------------------------- /src/wral/entry_test.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::Unstructured; 2 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 3 | 4 | use super::*; 5 | 6 | #[test] 7 | fn test_wral_entry() { 8 | use cbordata::{Cbor, FromCbor, IntoCbor}; 9 | 10 | let seed: u64 = random(); 11 | let mut rng = StdRng::seed_from_u64(seed); 12 | println!("test_wral_entry {}", seed); 13 | 14 | let mut entries: Vec = (0..1000) 15 | .map(|_i| { 16 | let bytes = rng.gen::<[u8; 32]>(); 17 | let mut uns = Unstructured::new(&bytes); 18 | uns.arbitrary::().unwrap() 19 | }) 20 | .collect(); 21 | entries.sort(); 22 | entries.dedup_by(|a, b| a.seqno == b.seqno); 23 | 24 | for entry in entries.iter() { 25 | let entry = entry.clone(); 26 | assert_eq!(entry.to_seqno(), entry.seqno); 27 | let (seqno, op) = entry.clone().unwrap(); 28 | assert_eq!(entry, Entry::new(seqno, op)); 29 | 30 | let cbor: Cbor = entry.clone().into_cbor().unwrap(); 31 | let mut buf: Vec = vec![]; 32 | let n = cbor.encode(&mut buf).unwrap(); 33 | let (val, m) = Cbor::decode(&mut buf.as_slice()).unwrap(); 34 | assert_eq!(n, m); 35 | assert_eq!(cbor, val); 36 | 37 | let entr = Entry::from_cbor(val).unwrap(); 38 | assert_eq!(entr, entry); 39 | } 40 | 41 | let mut seqno = 0; 42 | for entry in entries.into_iter() { 43 | assert!(seqno < entry.seqno, "{} {}", seqno, entry.seqno); 44 | seqno = entry.seqno 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/wral/files.rs: -------------------------------------------------------------------------------- 1 | use std::{ffi, path}; 2 | 3 | use crate::Error; 4 | 5 | pub fn make_filename(name: &str, num: usize) -> ffi::OsString { 6 | let file = format!("{}-journal-{:03}.dat", name, num); 7 | let file: &ffi::OsStr = file.as_ref(); 8 | file.to_os_string() 9 | } 10 | 11 | pub fn unwrap_filename(file: &ffi::OsStr) -> Option<(String, usize)> { 12 | let stem = { 13 | let fname = path::Path::new(path::Path::new(&file).file_name()?); 14 | match fname.extension()?.to_str()? { 15 | "dat" => Some(fname.file_stem()?.to_str()?.to_string()), 16 | _ => None, 17 | }? 18 | }; 19 | 20 | let mut parts: Vec<&str> = stem.split('-').collect(); 21 | 22 | let (name, parts) = match parts.len() { 23 | 3 => Some((parts.remove(0).to_string(), parts)), 24 | n if n > 3 => { 25 | let name: Vec<&str> = parts.drain(..n - 2).collect(); 26 | Some((name.join("-"), parts)) 27 | } 28 | _ => None, 29 | }?; 30 | 31 | match parts[..] { 32 | ["journal", num] => { 33 | let num: usize = err_at!(FailConvert, num.parse()).ok()?; 34 | Some((name, num)) 35 | } 36 | _ => None, 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/wral/journal_test.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::Unstructured; 2 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 3 | 4 | use super::*; 5 | 6 | #[test] 7 | fn test_wral_journal() { 8 | use std::env; 9 | 10 | let seed: u64 = random(); 11 | let mut rng = StdRng::seed_from_u64(seed); 12 | println!("test_wral_journal {}", seed); 13 | 14 | let name = "test_wral_journal"; 15 | let dir = env::temp_dir().into_os_string(); 16 | println!("test_wral_journal {:?}", dir); 17 | let mut jn = Journal::start(&dir, name, 0, state::NoState).unwrap(); 18 | assert_eq!(jn.to_journal_number(), 0); 19 | assert_eq!(jn.len_batches(), 0); 20 | assert_eq!(jn.as_state().clone(), state::NoState); 21 | 22 | let mut entries: Vec = (0..1_000_000) 23 | .map(|_i| { 24 | let bytes = rng.gen::<[u8; 32]>(); 25 | let mut uns = Unstructured::new(&bytes); 26 | uns.arbitrary::().unwrap() 27 | }) 28 | .collect(); 29 | entries.sort(); 30 | entries.dedup_by(|a, b| a.to_seqno() == b.to_seqno()); 31 | 32 | let mut n_batches = 0; 33 | let mut offset = 0; 34 | for _i in 0..1000 { 35 | let n = rng.gen::(); 36 | for _j in 0..n { 37 | let entry = entries[offset].clone(); 38 | jn.add_entry(entry.clone()).unwrap(); 39 | entries.push(entry); 40 | offset += 1; 41 | } 42 | 43 | assert_eq!(jn.to_last_seqno(), Some(entries[offset - 1].to_seqno())); 44 | 45 | jn.flush().unwrap(); 46 | if n > 0 { 47 | n_batches += 1; 48 | } 49 | 50 | assert_eq!(jn.to_last_seqno(), Some(entries[offset - 1].to_seqno())); 51 | } 52 | assert_eq!(n_batches, jn.len_batches()); 53 | 54 | let iter = IterJournal::from_journal(&jn, 0..=u64::MAX).unwrap(); 55 | let jn_entries: Vec = iter.map(|x| x.unwrap()).collect(); 56 | let entries = entries[..offset].to_vec(); 57 | assert_eq!(entries.len(), jn_entries.len()); 58 | assert_eq!(entries, jn_entries); 59 | 60 | { 61 | let (load_jn, _) = 62 | Journal::::load(name, &jn.to_location()).unwrap(); 63 | let iter = IterJournal::from_journal(&load_jn, 0..=u64::MAX).unwrap(); 64 | let jn_entries: Vec = iter.map(|x| x.unwrap()).collect(); 65 | let entries = entries[..offset].to_vec(); 66 | assert_eq!(entries.len(), jn_entries.len()); 67 | assert_eq!(entries, jn_entries); 68 | } 69 | 70 | jn.purge().unwrap(); 71 | } 72 | -------------------------------------------------------------------------------- /src/wral/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement Write-Ahead-Logging. 2 | //! 3 | //! Write-Ahead-Logging is implemented by [Wal] type. To get started create 4 | //! first a configuration [Config] value. Subsequently, a fresh Wal instance can be 5 | //! created or existing Wal from disk can be loaded, using the configuration. 6 | //! Wal optionally takes a type parameter `S` for state, that can be used by 7 | //! application to persist storage state along with each batch. 8 | //! By default, `NoState` is used. 9 | //! 10 | //! Concurrent writers 11 | //! ------------------ 12 | //! 13 | //! [Wal] writes are batch-processed, where batching is automatically dictated 14 | //! by storage (disk, ssd) latency. Latency can get higher when `fsync` is 15 | //! enabled for every batch flush. With fsync enabled it is hard to reduce 16 | //! the latency, and to get better throughput applications can do concurrent 17 | //! writes. This is possible because [Wal] type can be cloned with underlying 18 | //! structure safely shared among all the clones. For example, 19 | //! 20 | //! ```ignore 21 | //! let wal = wral::Wal::create(config, wral::NoState).unwrap(); 22 | //! let mut writers = vec![]; 23 | //! for id in 0..n_threads { 24 | //! let wal = wal.clone(); 25 | //! writers.push(std::thread::spawn(move || writer(id, wal))); 26 | //! } 27 | //! ``` 28 | //! 29 | //! Application employing concurrent [Wal] must keep in mind that `seqno` 30 | //! generated for consecutive ops may not be monotonically increasing within 31 | //! the same thread, and must make sure to serialize operations across the 32 | //! writers through other means. 33 | //! 34 | //! Concurrent readers 35 | //! ------------------ 36 | //! 37 | //! It is possible for a [Wal] value and its clones to concurrently read the 38 | //! log journal (typically iterating over its entries). Remember that read 39 | //! operations shall block concurrent writes and vice-versa. But concurrent 40 | //! reads shall be allowed. 41 | 42 | use std::ffi; 43 | 44 | mod batch; 45 | mod entry; 46 | mod files; 47 | mod journal; 48 | mod journals; 49 | mod state; 50 | mod wal; 51 | 52 | pub use crate::wral::entry::Entry; 53 | pub use crate::wral::state::{NoState, State}; 54 | pub use crate::wral::wal::Wal; 55 | 56 | /// Default journal file limit is set at 1GB. 57 | pub const JOURNAL_LIMIT: usize = 1024 * 1024 * 1024; 58 | /// Default channel buffer for flush thread, set at 1024. 59 | pub const SYNC_BUFFER: usize = 1024; 60 | 61 | /// Configuration for [Wal] type. 62 | #[derive(Debug, Clone)] 63 | pub struct Config { 64 | /// Uniquely name Wal instances. 65 | pub name: String, 66 | /// Directory in which wral journals are stored. 67 | pub dir: ffi::OsString, 68 | /// Define file-size limit for a single journal file, beyond which journal files 69 | /// are rotated. 70 | pub journal_limit: usize, 71 | /// Enable fsync for every flush. 72 | pub fsync: bool, 73 | } 74 | 75 | impl<'a> arbitrary::Arbitrary<'a> for Config { 76 | fn arbitrary(u: &mut arbitrary::Unstructured) -> arbitrary::Result { 77 | use std::env; 78 | 79 | let name: String = u.arbitrary()?; 80 | let dir = env::temp_dir().into_os_string(); 81 | 82 | let journal_limit = *u.choose(&[100, 1000, 10_000, 1_000_000])?; 83 | let fsync: bool = u.arbitrary()?; 84 | 85 | let config = Config { name, dir, journal_limit, fsync }; 86 | Ok(config) 87 | } 88 | } 89 | 90 | impl Config { 91 | pub fn new(dir: &ffi::OsStr, name: &str) -> Config { 92 | Config { 93 | name: name.to_string(), 94 | dir: dir.to_os_string(), 95 | journal_limit: JOURNAL_LIMIT, 96 | fsync: true, 97 | } 98 | } 99 | 100 | pub fn set_journal_limit(mut self, journal_limit: usize) -> Self { 101 | self.journal_limit = journal_limit; 102 | self 103 | } 104 | 105 | pub fn set_fsync(mut self, fsync: bool) -> Self { 106 | self.fsync = fsync; 107 | self 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/wral/state.rs: -------------------------------------------------------------------------------- 1 | use cbordata::{Cborize, FromCbor, IntoCbor}; 2 | 3 | #[allow(unused_imports)] 4 | use crate::wral::Wal; 5 | use crate::{wral, Result}; 6 | 7 | /// Callback trait for updating application state in relation to [Wal] type. 8 | pub trait State: 'static + Clone + Sync + Send + IntoCbor + FromCbor + Default { 9 | fn on_add_entry(&mut self, new_entry: &wral::Entry) -> Result<()>; 10 | } 11 | 12 | /// Default parameter, implementing [State] trait, for [Wal] type. 13 | #[derive(Clone, Default, Eq, PartialEq, Debug, Cborize)] 14 | pub struct NoState; 15 | 16 | impl NoState { 17 | const ID: u32 = 0x0; 18 | } 19 | 20 | impl State for NoState { 21 | fn on_add_entry(&mut self, _: &wral::Entry) -> Result<()> { 22 | Ok(()) 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/wral/wal_test.rs: -------------------------------------------------------------------------------- 1 | use arbitrary::Unstructured; 2 | use rand::{prelude::random, rngs::StdRng, Rng, SeedableRng}; 3 | 4 | use super::*; 5 | 6 | #[test] 7 | fn test_wral_wal() { 8 | use crate::wral::state; 9 | use std::env; 10 | 11 | let seed: u64 = 12 | [4285628235488288451, 4686907263384396610, random()][random::() % 3]; 13 | // let seed: u64 = 4686907263384396610; 14 | 15 | let mut rng = StdRng::seed_from_u64(seed); 16 | println!("test_wral_wal {}", seed); 17 | 18 | let mut config: Config = { 19 | let bytes = rng.gen::<[u8; 32]>(); 20 | let mut uns = Unstructured::new(&bytes); 21 | uns.arbitrary().unwrap() 22 | }; 23 | config.name = "test-wral-wal".to_string(); 24 | config.dir = { 25 | let dir: path::PathBuf = 26 | vec![env::temp_dir(), config.name.clone().into()].into_iter().collect(); 27 | dir.into() 28 | }; 29 | 30 | let n_threads = [1, 2, 4, 8][rng.gen::() % 4]; 31 | let w_ops = [1, 10, 100, 1_000, 10_000][rng.gen::() % 5]; 32 | config.journal_limit = std::cmp::max(1000, (n_threads * w_ops) / 1000); 33 | println!("test_wral_wal config:{:?} n_threads:{} w_ops:{}", config, n_threads, w_ops); 34 | 35 | let wal = Wal::create(config, state::NoState).unwrap(); 36 | 37 | let mut writers = vec![]; 38 | for id in 0..n_threads { 39 | let wal = wal.clone(); 40 | writers.push(std::thread::spawn(move || { 41 | writer(id, wal, w_ops, seed + (id as u64 * 100)) 42 | })); 43 | } 44 | 45 | let mut entries: Vec> = vec![]; 46 | for handle in writers { 47 | entries.push(handle.join().unwrap()); 48 | } 49 | let mut entries: Vec = entries.into_iter().flatten().collect(); 50 | entries.sort_unstable_by_key(|e| e.seqno); 51 | 52 | wal.commit().unwrap(); 53 | 54 | let n = entries.len() as u64; 55 | let sum = entries.iter().map(|e| e.to_seqno()).sum::(); 56 | assert_eq!(sum, (n * (n + 1)) / 2); 57 | 58 | let mut readers = vec![]; 59 | for id in 0..n_threads { 60 | let wal = wal.clone(); 61 | let entries = entries.clone(); 62 | let n_ops = 10; 63 | readers.push(std::thread::spawn(move || { 64 | reader(id, wal, n_ops, seed + (id as u64), entries) 65 | })); 66 | } 67 | 68 | for handle in readers { 69 | handle.join().unwrap(); 70 | } 71 | 72 | wal.purge().unwrap(); 73 | } 74 | 75 | fn writer(_id: usize, wal: Wal, ops: usize, seed: u64) -> Vec { 76 | let mut rng = StdRng::seed_from_u64(seed); 77 | 78 | let mut entries = vec![]; 79 | for _i in 0..ops { 80 | let op: Vec = { 81 | let bytes = rng.gen::<[u8; 32]>(); 82 | let mut uns = Unstructured::new(&bytes); 83 | uns.arbitrary().unwrap() 84 | }; 85 | let seqno = wal.add_op(&op).unwrap(); 86 | entries.push(wral::Entry::new(seqno, op)); 87 | } 88 | 89 | wal.close().unwrap(); 90 | 91 | entries 92 | } 93 | 94 | fn reader(_id: usize, wal: Wal, ops: usize, seed: u64, entries: Vec) { 95 | let mut rng = StdRng::seed_from_u64(seed); 96 | 97 | for _i in 0..ops { 98 | match rng.gen::() % 2 { 99 | 0 => { 100 | let items: Vec = 101 | wal.iter().unwrap().filter_map(|x| x.ok()).collect(); 102 | assert_eq!(items.len(), entries.len()); 103 | assert_eq!(items, entries); 104 | } 105 | 1 => { 106 | let start = rng.gen::() % entries.len(); 107 | let end = start + (rng.gen::() % (entries.len() - start)); 108 | let (x, y) = (entries[start].to_seqno(), entries[end].to_seqno()); 109 | let items: Vec = 110 | wal.range(x..y).unwrap().map(|x| x.unwrap()).collect(); 111 | assert_eq!(items, entries[start..end]); 112 | } 113 | _ => unreachable!(), 114 | } 115 | } 116 | 117 | wal.close().unwrap(); 118 | } 119 | -------------------------------------------------------------------------------- /src/zimf/mod.rs: -------------------------------------------------------------------------------- 1 | //! Module implement zim web-archive parser. 2 | 3 | mod workers; 4 | mod zim; 5 | 6 | pub use zim::{Cluster, Compression, Entry, Header, Namespace, Zimf}; 7 | -------------------------------------------------------------------------------- /src/zimf/workers.rs: -------------------------------------------------------------------------------- 1 | use std::{convert::TryFrom, ffi, fs, sync::mpsc}; 2 | 3 | use crate::{util, zimf::Cluster, Error, Result}; 4 | 5 | pub enum Req { 6 | ClusterHeader { 7 | off: u64, 8 | tx: mpsc::Sender>, 9 | }, 10 | ClusterBlocks { 11 | cluster: Cluster, 12 | tx: mpsc::Sender>, 13 | }, 14 | } 15 | 16 | pub enum Res { 17 | Cluster { cluster: Cluster }, 18 | Blocks { blobs: Vec> }, 19 | } 20 | 21 | impl TryFrom for Cluster { 22 | type Error = Error; 23 | 24 | fn try_from(res: Res) -> Result { 25 | match res { 26 | Res::Cluster { cluster } => Ok(cluster), 27 | _ => unreachable!(), 28 | } 29 | } 30 | } 31 | 32 | pub fn read_cluster_header( 33 | pool: &util::thread::Pool>, 34 | off: u64, // cluster start fpos 35 | tx: mpsc::Sender>, 36 | ) -> Result<()> { 37 | let req = Req::ClusterHeader { off, tx }; 38 | pool.post(req) 39 | } 40 | 41 | pub fn read_cluster_blobs( 42 | pool: &util::thread::Pool>, 43 | cluster: Cluster, // cluster start fpos 44 | tx: mpsc::Sender>, 45 | ) -> Result<()> { 46 | let req = Req::ClusterBlocks { cluster, tx }; 47 | pool.post(req) 48 | } 49 | 50 | pub fn worker(zim_loc: ffi::OsString, rx: util::thread::Rx) -> Result<()> { 51 | let mut fd = err_at!(IOError, fs::OpenOptions::new().read(true).open(&zim_loc))?; 52 | for msg in rx { 53 | // println!("worker id:{} received msg", _id); 54 | match msg { 55 | (Req::ClusterHeader { off, tx }, None) => { 56 | match Cluster::from_offset(off, &mut fd) { 57 | Ok(cluster) => { 58 | err_at!(IPCFail, tx.send(Ok(Res::Cluster { cluster })))? 59 | } 60 | Err(err) => err_at!(IPCFail, tx.send(Err(err)))?, 61 | } 62 | } 63 | (Req::ClusterBlocks { cluster, tx }, None) => { 64 | // println!("worker id:{} cluster {:?}", _id, cluster); 65 | match cluster.to_blobs(&mut fd) { 66 | Ok(blobs) => err_at!(IPCFail, tx.send(Ok(Res::Blocks { blobs })))?, 67 | Err(err) => err_at!(IPCFail, tx.send(Err(err)))?, 68 | } 69 | } 70 | _ => unreachable!(), 71 | } 72 | } 73 | // println!("worker id:{} closed", _id); 74 | 75 | Ok(()) 76 | } 77 | --------------------------------------------------------------------------------