├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .idea ├── .gitignore ├── badger-rs.iml ├── modules.xml └── vcs.xml ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── my_benchmark.rs ├── build.rs ├── examples └── badger.rs ├── lock.txt └── src ├── backup.rs ├── compaction.rs ├── doc └── write.md ├── event └── mod.rs ├── iterator.rs ├── kv.rs ├── kv_test.rs ├── level_handler.rs ├── levels.rs ├── lib.rs ├── lock.txt ├── log_file.rs ├── manifest.rs ├── options └── mod.rs ├── pb ├── backup.proto ├── backup.rs ├── badgerpb3.proto ├── badgerpb3.rs └── mod.rs ├── skl ├── alloc.rs ├── arena.rs ├── cursor.rs ├── mod.rs ├── node.rs └── skip.rs ├── st_manager.rs ├── table ├── builder.rs ├── iterator.rs ├── mod.rs ├── table.rs └── tests.rs ├── test_data └── vlog_file.text ├── test_util.rs ├── types.rs ├── value_log.rs ├── value_log_tests.rs └── y ├── codec.rs ├── iterator.rs ├── merge_iterator.rs ├── metrics.rs └── mod.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Install latest nightly 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: nightly 23 | override: true 24 | - name: Build 25 | run: cargo build --verbose 26 | - name: Run tests 27 | run: cargo test --verbose 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /.idea/badger-rs.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "badger-rs" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | serde = { version = "1.0.171", features = ["derive"] } 10 | serde_json = { version = "1.0.103", default-features = true, features = ["alloc"] } 11 | anyhow = "1.0.72" 12 | thiserror = "1.0.43" 13 | tokio = { version = "1.29.1", features = ["full", "tracing"] } 14 | byteorder = "1.4.3" 15 | rand = "0.8.5" 16 | maligned = "0.2.1" 17 | atomic = "0.5.3" 18 | tabled = { version = "0.12.2", features = ["ansi-str", "color"] } 19 | memmap = "0.7.0" 20 | bytes = "1.4.0" 21 | bloom = "0.3.2" 22 | growable-bloom-filter = { version = "2.0.1", features = ["nightly"] } 23 | filename = "0.1.1" 24 | num_cpus = "1.16.0" 25 | threads_pool = "0.2.6" 26 | crc32fast = "1.3.2" 27 | async-trait = "0.1.71" 28 | fmmap = { version = "0.3.2", features = ["tokio-async"] } 29 | parking_lot = "0.12.1" 30 | bitflags = "2.3.3" 31 | libc = "0.2.147" 32 | log = { version = "0.4.19", features = ["kv_unstable", "kv_unstable_serde", "kv_unstable_sval"] } 33 | async-channel = "1.9.0" 34 | file-guard = "0.1.0" 35 | fs2 = "0.4.3" 36 | awaitgroup = "0.7.0" 37 | range-lock = "0.2.3" 38 | tracing = "0.1.37" 39 | drop_cell = "0.0.0" 40 | walkdir = "2.3.3" 41 | crossbeam-epoch = "0.9.15" 42 | tokio-context = "0.1.3" 43 | dyn-clone = "1.0.12" 44 | eieio = "1.0.0" 45 | either = "1.8.1" 46 | enum-unitary = "0.5.0" 47 | atom_box = "0.1.2" 48 | console-subscriber = "0.1.10" 49 | uuid = { version = "1.4.1", features = ["v5", "v4"] } 50 | winapi = "0.3.9" 51 | itertools = "0.11.0" 52 | tokio-metrics = "0.2.2" 53 | metrics = "0.21.1" 54 | metrics-prometheus = "0.4.1" 55 | prometheus = "0.13.3" 56 | lazy_static = "1.4.0" 57 | getset = "0.1.2" 58 | tokio-stream = "0.1.14" 59 | async-stream = "0.3.5" 60 | futures-core = "0.3.28" 61 | backtrace-on-stack-overflow = "0.3.0" 62 | protobuf = { version = "3.0.0-alpha.2", features = ["with-bytes"] } 63 | [dev-dependencies] 64 | tracing-subscriber = "0.3.17" 65 | tracing-log = "0.1.3" 66 | chrono = "0.4.26" 67 | env_logger = "0.10.0" 68 | console_log = { version = "1.0.0", features = ["color"] } 69 | itertools = "0.11.0" 70 | tokio-metrics = { version = "0.2.2", default-features = false } 71 | tokio = { version = "1.29.1", features = ["full", "rt", "time", "macros", "test-util"] } 72 | criterion = { version = "0.5.1", features = ["tokio"] } 73 | 74 | [build] 75 | rustflags = ["--cfg", "tokio_unstable"] 76 | 77 | [build-dependencies] 78 | protoc-rust = "3.0.0-alpha.2" 79 | 80 | [[bench]] 81 | name = "my_benchmark" 82 | harness = false 83 | 84 | [profile.dev] 85 | debug-assertions = false 86 | 87 | [profile.release] 88 | codegen-units=1 89 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # badger-rs 2 | ![example workflow name](https://github.com/laohanlinux/badger-rs/workflows/Rust/badge.svg) 3 | 4 | Badger is based on [WiscKey paper by University of Wisconsin, Madison](https://www.usenix.org/system/files/conference/fast16/fast16-papers-lu.pdf). 5 | 6 | Thanks for [dgraph-io/badger](https://github.com/dgraph-io/badger) -------------------------------------------------------------------------------- /benches/my_benchmark.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | fn main() { 3 | use criterion::BenchmarkId; 4 | use criterion::Criterion; 5 | use criterion::{criterion_group, criterion_main}; 6 | 7 | // This is a struct that tells Criterion.rs to use the "futures" crate's current-thread executor 8 | use criterion::async_executor::AsyncExecutor; 9 | 10 | // Here we have an async function to benchmark 11 | async fn do_something(size: usize) { 12 | // Do something async with the size 13 | } 14 | 15 | fn from_elem(c: &mut Criterion) { 16 | let size: usize = 1024; 17 | 18 | c.bench_with_input(BenchmarkId::new("input_example", size), &size, |b, &s| { 19 | // Insert a call to `to_async` to convert the bencher to async mode. 20 | // The timing loops are the same as with the normal bencher. 21 | // b.to_async(FuturesExecutor).iter(|| do_something(s)); 22 | }); 23 | } 24 | 25 | criterion_group!(benches, from_elem); 26 | criterion_main!(benches); 27 | } 28 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | extern crate protoc_rust; 2 | 3 | fn main() { 4 | // protoc --rust_out=src/pb src/pb/badgerpb3.proto 5 | //protoc_rust::Codegen::new() 6 | // .out_dir("src/pb") 7 | //.inputs(&["src/pb/badgerpb3.proto", "src/pb/backup.proto"]) 8 | // .run() 9 | // .expect("Running protoc failed"); 10 | } 11 | -------------------------------------------------------------------------------- /examples/badger.rs: -------------------------------------------------------------------------------- 1 | #[tokio::main] 2 | async fn main() { 3 | let env = tracing_subscriber::EnvFilter::from_default_env(); 4 | tracing_subscriber::FmtSubscriber::builder() 5 | .with_env_filter(env) 6 | .try_init() 7 | .unwrap(); 8 | let opt = badger_rs::Options::default(); 9 | let kv = badger_rs::KV::open(opt).await.unwrap(); 10 | kv.set( 11 | b"hello word".to_vec(), 12 | b">>>>>I LOVE YOU!<<<<<".to_vec(), 13 | 0x0, 14 | ) 15 | .await 16 | .unwrap(); 17 | 18 | let got = kv.get(b"hello word").await.unwrap(); 19 | println!("{}", String::from_utf8_lossy(&got)); 20 | } 21 | -------------------------------------------------------------------------------- /lock.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laohanlinux/badger-rs/00bbe70da1f4b0fe7d52ffbdf8da91867a147834/lock.txt -------------------------------------------------------------------------------- /src/backup.rs: -------------------------------------------------------------------------------- 1 | use std::io::Write; 2 | use byteorder::{LittleEndian, WriteBytesExt}; 3 | use protobuf::Message; 4 | use crate::pb::backup::KVPair; 5 | 6 | pub fn write_to(entry: &KVPair, wt: &mut W) -> crate::Result<()> where W: Write { 7 | let buf = entry.write_to_bytes().unwrap(); 8 | wt.write_u64::(buf.len() as u64)?; 9 | wt.write_all(&buf)?; 10 | Ok(()) 11 | } 12 | -------------------------------------------------------------------------------- /src/compaction.rs: -------------------------------------------------------------------------------- 1 | use crate::hex_str; 2 | use crate::levels::CompactDef; 3 | use crate::table::table::Table; 4 | 5 | use log::{error, info, warn}; 6 | use parking_lot::lock_api::{RwLockReadGuard, RwLockWriteGuard}; 7 | use parking_lot::{RawRwLock, RwLock}; 8 | use std::fmt::{Display, Formatter}; 9 | use std::sync::atomic::{AtomicU64, Ordering}; 10 | use std::sync::Arc; 11 | 12 | #[derive(Debug)] 13 | pub(crate) struct CompactStatus { 14 | // every level has a *CompactionStatus* that includes multipart *KeyRange* 15 | pub(crate) levels: RwLock>, 16 | } 17 | 18 | impl Default for CompactStatus { 19 | fn default() -> Self { 20 | CompactStatus { 21 | levels: RwLock::new(vec![]), 22 | } 23 | } 24 | } 25 | 26 | impl CompactStatus { 27 | // Check whether we can run this *CompactDef*. That it doesn't overlap with any 28 | // other running Compaction. If it can be run, it would store this run in the compactStatus state. 29 | pub(crate) fn compare_and_add(&self, cd: &CompactDef) -> bool { 30 | let level = cd.this_level.level(); 31 | assert!( 32 | level + 1 < self.rl().len(), 33 | "Got level {}, max level {}", 34 | level, 35 | self.rl().len() 36 | ); 37 | let lc = self.rl(); 38 | let this_level = lc.get(level).unwrap(); 39 | let next_level = lc.get(level + 1).unwrap(); 40 | if this_level.overlaps_with(&cd.this_range) { 41 | return false; 42 | } 43 | if next_level.overlaps_with(&cd.next_range) { 44 | return false; 45 | } 46 | 47 | // Check whether this level really needs compaction or not. Otherwise, we'll end up 48 | // running parallel compactions for the same level. 49 | // *NOTE*: We can directly call this_level.total_size, because we already have acquired a read lock 50 | // over this and the next level. 51 | if cd.this_level.get_total_size() - this_level.get_del_size() 52 | < cd.this_level.get_max_total_size() 53 | { 54 | log::info!( 55 | "skip the compaction, top_size:{}, bot_size:{}, max_size:{}", 56 | cd.this_level.get_total_size(), 57 | cd.next_level.get_total_size(), 58 | cd.this_level.get_max_total_size() 59 | ); 60 | return false; 61 | } 62 | this_level.add(cd.this_range.clone()); 63 | next_level.add(cd.next_range.clone()); 64 | this_level.incr_del_size(cd.this_size.load(Ordering::Relaxed)); 65 | true 66 | } 67 | 68 | // Delete CompactDef. 69 | pub(crate) fn delete(&self, cd: &CompactDef) { 70 | let levels = self.wl(); 71 | let level = cd.this_level.level(); 72 | assert!( 73 | level < levels.len() - 1, 74 | "Got level {}, Max levels {}", 75 | level, 76 | levels.len() 77 | ); 78 | 79 | let this_level = levels.get(level).unwrap(); 80 | let next_level = levels.get(level + 1).unwrap(); 81 | // Decr delete size after compacted. 82 | this_level.decr_del_size(cd.this_size.load(Ordering::Relaxed)); 83 | let mut found = this_level.remove(&cd.this_range); 84 | // top level must have KeyRange because it is compact's base condition 85 | assert!(found, "{}", this_level); 86 | found = next_level.remove(&cd.next_range) && found; 87 | if !found { 88 | let this_kr = &cd.this_range; 89 | let next_kr = &cd.next_range; 90 | warn!("Looking for: [{}] in this level.", this_kr,); 91 | warn!("This Level: {}", level); 92 | warn!("Looking for: [{}] in next level.", next_kr); 93 | warn!("Next Level: {}", level + 1); 94 | warn!("KeyRange not found"); 95 | warn!("Looking for seek k range"); 96 | warn!("{}, {}", cd.this_range, cd.next_range); 97 | } 98 | } 99 | 100 | // Return trur if the level overlap with this, otherwise false 101 | pub(crate) fn overlaps_with(&self, level: usize, this: &KeyRange) -> bool { 102 | let cstatus = &self.rl()[level]; 103 | let overlaps = cstatus.overlaps_with(this); 104 | #[cfg(test)] 105 | log::info!( 106 | "level{} compact status compare, {:?}, dst: {:?}, overlaps: {}", 107 | level, 108 | cstatus.rl(), 109 | this, 110 | overlaps 111 | ); 112 | overlaps 113 | } 114 | 115 | // Return level's deleted data count 116 | pub(crate) fn del_size(&self, level: usize) -> u64 { 117 | self.rl()[level].get_del_size() 118 | } 119 | 120 | // Return Level's compaction status with *WriteLockGuard* 121 | pub(crate) fn wl(&self) -> RwLockWriteGuard<'_, RawRwLock, Vec> { 122 | self.levels.write() 123 | } 124 | 125 | // Return Level's compaction status with *ReadLockGuard* 126 | pub(crate) fn rl(&self) -> RwLockReadGuard<'_, RawRwLock, Vec> { 127 | self.levels.read() 128 | } 129 | 130 | pub(crate) fn to_log(&self) { 131 | let status = self.rl(); 132 | info!("Compact levels, count:{}", status.len()); 133 | for level in status.iter().enumerate() { 134 | info!("[{}] {}", level.0, level.1.to_string()) 135 | } 136 | } 137 | } 138 | 139 | // Every level compacted status(ranges). 140 | // del_size: all KeyRange size at the level (NOTE: equal LevelCompactStatus.ranges delete size, so after compacting, 141 | // KeyRange and del_size all be decr) 142 | #[derive(Clone, Debug)] 143 | pub(crate) struct LevelCompactStatus { 144 | ranges: Arc>>, 145 | del_size: Arc, 146 | } 147 | 148 | impl Default for LevelCompactStatus { 149 | fn default() -> Self { 150 | LevelCompactStatus { 151 | ranges: Arc::new(RwLock::new(Vec::new())), 152 | del_size: Arc::new(AtomicU64::new(0)), 153 | } 154 | } 155 | } 156 | 157 | impl Display for LevelCompactStatus { 158 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 159 | let ranges = self 160 | .rl() 161 | .iter() 162 | .map(|kr| kr.to_string()) 163 | .collect::>() 164 | .join(","); 165 | let del_size = self.get_del_size(); 166 | f.debug_struct("LevelCompactStatus") 167 | .field("ranges", &ranges) 168 | .field("del_size", &del_size) 169 | .finish() 170 | } 171 | } 172 | 173 | impl LevelCompactStatus { 174 | // returns true if self.ranges and dst has overlap, otherwise returns false 175 | fn overlaps_with(&self, dst: &KeyRange) -> bool { 176 | self.rl().iter().any(|kr| kr.overlaps_with(dst)) 177 | } 178 | 179 | // remove dst from self.ranges 180 | pub(crate) fn remove(&self, dst: &KeyRange) -> bool { 181 | let mut rlock = self.wl(); 182 | let len = rlock.len(); 183 | rlock.retain(|r| r != dst); 184 | len > rlock.len() 185 | } 186 | 187 | // add dst range 188 | fn add(&self, dst: KeyRange) { 189 | self.wl().push(dst); 190 | } 191 | 192 | pub(crate) fn get_del_size(&self) -> u64 { 193 | self.del_size.load(Ordering::Acquire) 194 | } 195 | 196 | fn incr_del_size(&self, n: u64) { 197 | self.del_size.fetch_add(n, Ordering::Release); 198 | } 199 | 200 | fn decr_del_size(&self, n: u64) { 201 | self.del_size.fetch_sub(n, Ordering::Release); 202 | } 203 | 204 | fn wl(&self) -> RwLockWriteGuard<'_, RawRwLock, Vec> { 205 | self.ranges.write() 206 | } 207 | 208 | fn rl(&self) -> RwLockReadGuard<'_, RawRwLock, Vec> { 209 | self.ranges.read() 210 | } 211 | } 212 | 213 | // [left, right], Special inf is range all if it be set `true` 214 | #[derive(Clone, Default, Debug)] 215 | pub(crate) struct KeyRange { 216 | pub(crate) left: Vec, 217 | // TODO zero Copy 218 | pub(crate) right: Vec, 219 | pub(crate) inf: bool, 220 | } 221 | 222 | impl PartialEq for KeyRange { 223 | fn eq(&self, other: &Self) -> bool { 224 | self.equals(other) 225 | } 226 | } 227 | 228 | impl Display for KeyRange { 229 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 230 | write!( 231 | f, 232 | "", 233 | hex_str(&self.left), 234 | hex_str(&self.right), 235 | self.inf 236 | ) 237 | } 238 | } 239 | 240 | // Including all keys 241 | pub(crate) const INFO_RANGE: KeyRange = KeyRange { 242 | left: vec![], 243 | right: vec![], 244 | inf: true, 245 | }; 246 | 247 | impl KeyRange { 248 | // Get the KeyRange of tables 249 | pub(crate) fn get_range(tables: &Vec) -> KeyRange { 250 | assert!(!tables.is_empty()); 251 | let mut smallest = tables[0].smallest(); 252 | let mut biggest = tables[0].biggest(); 253 | for i in 1..tables.len() { 254 | if tables[i].smallest() < smallest { 255 | smallest = tables[i].smallest(); 256 | } 257 | if tables[i].biggest() > biggest { 258 | biggest = tables[i].biggest(); 259 | } 260 | } 261 | KeyRange { 262 | left: smallest.to_vec(), 263 | right: biggest.to_vec(), 264 | inf: false, 265 | } 266 | } 267 | 268 | // Left, right, inf all same, indicate equal 269 | pub(crate) fn equals(&self, other: &KeyRange) -> bool { 270 | self.left == other.left && self.right == self.right && self.inf == self.inf 271 | } 272 | 273 | // Check for overlap, *Notice*, if a and b are all inf, indicate has overlap. 274 | pub(crate) fn overlaps_with(&self, other: &KeyRange) -> bool { 275 | if self.inf || other.inf { 276 | return true; 277 | } 278 | 279 | // ---[other_left, other_right]--[] 280 | if self.left > other.right { 281 | return false; 282 | } 283 | // ---[]--[other-left, other-right] 284 | if self.right < other.left { 285 | return false; 286 | } 287 | true 288 | } 289 | } 290 | 291 | mod tests { 292 | use crate::compaction::{KeyRange, INFO_RANGE}; 293 | 294 | #[test] 295 | fn key_range() { 296 | let mut v = vec![KeyRange { 297 | left: vec![], 298 | right: vec![], 299 | inf: true, 300 | }]; 301 | let cd = INFO_RANGE; 302 | v.retain(|kr| kr != &cd); 303 | assert!(v.is_empty()); 304 | let tests = vec![vec![2, 20], vec![30, 50], vec![70, 80]]; 305 | 306 | let inputs = vec![ 307 | vec![0, 1], 308 | vec![81, 100], 309 | vec![21, 25], 310 | vec![29, 40], 311 | vec![40, 60], 312 | vec![21, 51], 313 | vec![21, 100], 314 | vec![0, 200], 315 | vec![0, 70], 316 | vec![70, 80], 317 | ]; 318 | 319 | for (i, arg) in inputs.iter().enumerate() { 320 | let left = tests.binary_search_by(|probe| probe[1].cmp(&arg[0])); 321 | let left = left.unwrap_or_else(|n| n); 322 | let right = tests.binary_search_by(|probe| probe[0].cmp(&arg[1])); 323 | let right = right.map(|n| n + 1).unwrap_or_else(|n| n); 324 | println!("{}, {:?}, {:?}", i, left, right); 325 | } 326 | } 327 | } 328 | -------------------------------------------------------------------------------- /src/doc/write.md: -------------------------------------------------------------------------------- 1 | Put Key 2 | 3 | ```mermaid 4 | %% Example of sequence diagram 5 | sequenceDiagram 6 | actor KV 7 | participant WriteCh 8 | actor FlushCh 9 | KV-->>WriteCh: Async Send Req 10 | activate WriteCh 11 | alt Inner Data Transfer 12 | WriteCh-->>WriteCh: 1. Call writeRequests[Mult Reqs] 13 | WriteCh -->>WriteCh: 2. Write Into Vlog, Fill Ptrs 14 | WriteCh -)WriteCh: 3. Check ensureRoomForWrite 15 | WriteCh -->>FlushCh: 4. Send flushTask{s.mt, s.vptr} to FlushCh 16 | Note right of WriteCh: 1) vlog.sync(): Ensure value log is synced to disk so this memtable's contents wouldn't be lost.
2) s.imm = append(s.imm, s.mt): We manage to push this task. Let's modify imm.
3) s.mt = skl.NewSkiplist(arenaSize(&s.opt)): New memtable is empty. We certainly have room. 17 | WriteCh -->>WriteCh: 5. If not pass 3, writeToLSM 18 | WriteCh-->>WriteCh: 6. updateOffset [update lasted Ptr] 19 | end 20 | WriteCh-->> KV: Async Return Req 21 | deactivate WriteCh 22 | activate FlushCh 23 | FlushCh -->> FlushCh: Receive FlushTask From 4 24 | FlushCh -->> FlushCh: ft.mt is nil ? and ft.vptr.IsZero()? Put Offset for replay 25 | FlushCh -->> FlushCh: Create a new table, writeLevel0Table and addLevel0Table 26 | deactivate FlushCh 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /src/event/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::table::table::Table; 2 | use lazy_static::lazy_static; 3 | use prometheus::{Gauge, IntCounter, IntCounterVec, IntGauge, IntGaugeVec, Opts, Registry}; 4 | use std::fmt; 5 | use std::fmt::Formatter; 6 | use std::time::{Duration, Instant}; 7 | 8 | lazy_static! { 9 | static ref EV: EvMetrics = EvMetrics { 10 | lsm_size: IntGaugeVec::new( 11 | prometheus::Opts::new("badger_lsm_size_bytes", "lsm size bytes by direct"), 12 | &["direct"] 13 | ) 14 | .unwrap(), 15 | vlog_size: IntGauge::new("vlog_size", "vlog size bytes").unwrap(), 16 | pending_writes: IntGauge::new("pending_writes_total", "pending writes total").unwrap(), 17 | num_reads: IntCounter::new("num_reads", "number of reads").unwrap(), 18 | num_writes: IntCounter::new("num_writes", "number of writes").unwrap(), 19 | num_bytes_read: IntCounter::new("num_bytes_read", "bytes of read").unwrap(), 20 | num_bytes_written: IntCounter::new("num_bytes_written", "bytes of written").unwrap(), 21 | num_lsm_gets: IntCounter::new("num_lsm_gets", "number of lsm gets").unwrap(), 22 | num_lsm_bloom_hits: IntCounter::new("num_bloom_hits", "number of bloom hits").unwrap(), 23 | num_blocked_puts: IntCounter::new("num_blocked_hits", "number of blocked hits").unwrap(), 24 | num_mem_tables_gets: IntCounter::new("num_mem_tables", "number of the memtable gets") 25 | .unwrap(), 26 | num_gets: IntCounter::new("num_gets", "number of gets").unwrap(), 27 | num_puts: IntCounter::new("num_puts", "number of puts").unwrap(), 28 | block_hash_calc_cost: IntCounter::new( 29 | "block_hash_calc_cost", 30 | "block hash calc cost for bloom" 31 | ) 32 | .unwrap(), 33 | }; 34 | } 35 | 36 | #[derive(Debug)] 37 | pub struct EvMetrics { 38 | pub lsm_size: IntGaugeVec, 39 | pub vlog_size: IntGauge, 40 | pub pending_writes: IntGauge, 41 | 42 | /// These are cumulative 43 | pub num_reads: IntCounter, 44 | pub num_writes: IntCounter, 45 | pub num_bytes_read: IntCounter, 46 | pub num_bytes_written: IntCounter, 47 | pub num_lsm_gets: IntCounter, 48 | pub num_lsm_bloom_hits: IntCounter, 49 | pub num_gets: IntCounter, 50 | pub num_puts: IntCounter, 51 | pub num_blocked_puts: IntCounter, 52 | /// number of the memtable gets 53 | pub num_mem_tables_gets: IntCounter, 54 | pub block_hash_calc_cost: IntCounter, 55 | } 56 | 57 | impl fmt::Display for EvMetrics { 58 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 59 | use tabled::{Table, Tabled}; 60 | 61 | #[derive(Tabled)] 62 | struct KeyPair { 63 | label: String, 64 | value: String, 65 | } 66 | let mut kv = vec![]; 67 | kv.push(KeyPair { 68 | label: "num_reads".to_owned(), 69 | value: self.num_reads.get().to_string(), 70 | }); 71 | kv.push(KeyPair { 72 | label: "num_writes".to_owned(), 73 | value: self.num_writes.get().to_string(), 74 | }); 75 | kv.push(KeyPair { 76 | label: "num_bytes_read".to_owned(), 77 | value: self.num_bytes_read.get().to_string(), 78 | }); 79 | kv.push(KeyPair { 80 | label: "num_bytes_written".to_owned(), 81 | value: self.num_bytes_written.get().to_string(), 82 | }); 83 | kv.push(KeyPair { 84 | label: "num_lsm_gets".to_owned(), 85 | value: self.num_lsm_gets.get().to_string(), 86 | }); 87 | kv.push(KeyPair { 88 | label: "num_lsm_bloom_hits".to_owned(), 89 | value: self.num_lsm_bloom_hits.get().to_string(), 90 | }); 91 | kv.push(KeyPair { 92 | label: "num_gets".to_owned(), 93 | value: self.num_gets.get().to_string(), 94 | }); 95 | kv.push(KeyPair { 96 | label: "num_puts".to_owned(), 97 | value: self.num_puts.get().to_string(), 98 | }); 99 | kv.push(KeyPair { 100 | label: "num_blocked_puts".to_owned(), 101 | value: self.num_blocked_puts.get().to_string(), 102 | }); 103 | kv.push(KeyPair { 104 | label: "num_mem_tables_gets".to_owned(), 105 | value: self.num_mem_tables_gets.get().to_string(), 106 | }); 107 | kv.push(KeyPair { 108 | label: "block_hash_calc_cost".to_owned(), 109 | value: self.block_hash_calc_cost.get().to_string(), 110 | }); 111 | let table_str = Table::new(kv).to_string(); 112 | f.write_str(&table_str) 113 | } 114 | } 115 | 116 | pub fn get_metrics() -> &'static EvMetrics { 117 | &EV 118 | } 119 | -------------------------------------------------------------------------------- /src/iterator.rs: -------------------------------------------------------------------------------- 1 | use crate::iterator::PreFetchStatus::Prefetched; 2 | use crate::kv::_BADGER_PREFIX; 3 | use crate::types::{ArcRW, Channel, Closer, TArcMx, TArcRW}; 4 | use crate::{hex_str, ValueStruct, KV}; 5 | use crate::{ 6 | value_log::{MetaBit, ValuePointer}, 7 | Decode, MergeIterator, Result, Xiterator, EMPTY_SLICE, 8 | }; 9 | 10 | use atomic::Atomic; 11 | 12 | use std::fmt::{Debug, Display, Formatter, Pointer}; 13 | use std::future::Future; 14 | 15 | use std::pin::{pin, Pin}; 16 | 17 | use std::sync::atomic::Ordering; 18 | use std::sync::Arc; 19 | use std::{io::Cursor, sync::atomic::AtomicU64}; 20 | use tokio::io::AsyncWriteExt; 21 | use tokio::sync::{RwLockReadGuard, RwLockWriteGuard}; 22 | 23 | #[derive(Debug, PartialEq, Copy, Clone)] 24 | pub(crate) enum PreFetchStatus { 25 | Empty, 26 | Prefetched, 27 | } 28 | 29 | #[derive(Clone, Debug)] 30 | pub struct KVItem { 31 | inner: TArcRW, 32 | } 33 | 34 | impl From for KVItem { 35 | fn from(value: KVItemInner) -> Self { 36 | Self { 37 | inner: TArcRW::new(tokio::sync::RwLock::new(value)), 38 | } 39 | } 40 | } 41 | // impl Deref for KVItem { 42 | // type Target = tokio::sync::RwLock; 43 | // 44 | // fn deref(&self) -> &Self::Target { 45 | // self.inner.as_ref() 46 | // } 47 | // } 48 | 49 | impl KVItem { 50 | pub async fn key(&self) -> Vec { 51 | let inner = self.rl().await; 52 | inner.key().to_vec() 53 | } 54 | 55 | pub async fn value(&self) -> Result> { 56 | let inner = self.rl().await; 57 | inner.get_value().await 58 | } 59 | 60 | pub async fn has_value(&self) -> bool { 61 | let inner = self.rl().await; 62 | inner.has_value() 63 | } 64 | 65 | pub async fn counter(&self) -> u64 { 66 | let inner = self.rl().await; 67 | inner.counter() 68 | } 69 | 70 | pub async fn user_meta(&self) -> u8 { 71 | let inner = self.rl().await; 72 | inner.user_meta() 73 | } 74 | 75 | pub(crate) async fn rl(&self) -> RwLockReadGuard<'_, KVItemInner> { 76 | self.inner.read().await 77 | } 78 | 79 | pub(crate) async fn wl(&self) -> RwLockWriteGuard<'_, KVItemInner> { 80 | self.inner.write().await 81 | } 82 | } 83 | 84 | // Returned during iteration. Both the key() and value() output is only valid until 85 | // iterator.next() is called. 86 | #[derive(Clone)] 87 | pub(crate) struct KVItemInner { 88 | status: Arc>, 89 | kv: KV, 90 | key: Vec, 91 | // TODO, Opz memory 92 | vptr: Vec, 93 | value: TArcMx>, 94 | meta: u8, 95 | user_meta: u8, 96 | cas_counter: Arc, 97 | wg: Closer, 98 | err: Result<()>, 99 | } 100 | 101 | impl Display for KVItemInner { 102 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 103 | f.debug_struct("kv") 104 | .field("key", &hex_str(&self.key)) 105 | .field("meta", &self.meta) 106 | .field("user_meta", &self.user_meta) 107 | .field("cas", &self.counter()) 108 | .finish() 109 | } 110 | } 111 | 112 | impl Debug for KVItemInner { 113 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 114 | f.debug_struct("kv") 115 | .field("key", &hex_str(&self.key)) 116 | .field("meta", &self.meta) 117 | .field("user_meta", &self.user_meta) 118 | .field("cas", &self.counter()) 119 | .finish() 120 | } 121 | } 122 | 123 | impl KVItemInner { 124 | pub(crate) fn new(key: Vec, value: ValueStruct, kv: KV) -> KVItemInner { 125 | Self { 126 | status: Arc::new(Atomic::new(PreFetchStatus::Empty)), 127 | kv, 128 | key, 129 | value: TArcMx::new(Default::default()), 130 | vptr: value.value, 131 | meta: value.meta, 132 | user_meta: value.user_meta, 133 | cas_counter: Arc::new(AtomicU64::new(value.cas_counter)), 134 | wg: Closer::new("kv".to_owned()), 135 | err: Ok(()), 136 | } 137 | } 138 | 139 | // Returns the key. Remember to copy if you need to access it outside the iteration loop. 140 | pub(crate) fn key(&self) -> &[u8] { 141 | &self.key 142 | } 143 | 144 | // Return value 145 | pub(crate) async fn get_value(&self) -> Result> { 146 | let ch = Channel::new(1); 147 | self.value(|value| { 148 | let tx = ch.tx(); 149 | let value = value.to_vec(); 150 | Box::pin(async move { 151 | tx.send(value).await.unwrap(); 152 | Ok(()) 153 | }) 154 | }) 155 | .await?; 156 | Ok(ch.recv().await.unwrap()) 157 | } 158 | 159 | // Value retrieves the value of the item from the value log. It calls the 160 | // consumer function with a slice argument representing the value. In case 161 | // of error, the consumer function is not called. 162 | // 163 | // Note that the call to the consumer func happens synchronously. 164 | pub(crate) async fn value( 165 | &self, 166 | mut consumer: impl FnMut(&[u8]) -> Pin> + Send>>, 167 | ) -> Result<()> { 168 | // Wait result 169 | self.wg.wait().await; 170 | if self.status.load(Ordering::Acquire) == Prefetched { 171 | if self.err.is_err() { 172 | return self.err.clone(); 173 | } 174 | let value = self.value.lock().await; 175 | return if value.is_empty() { 176 | consumer(&EMPTY_SLICE).await 177 | } else { 178 | consumer(&value).await 179 | }; 180 | } 181 | return self.kv.yield_item_value(self.clone(), consumer).await; 182 | } 183 | 184 | pub(crate) fn has_value(&self) -> bool { 185 | if self.meta == 0 && self.vptr.is_empty() { 186 | return false; 187 | } 188 | if (self.meta & MetaBit::BIT_DELETE.bits()) > 0 { 189 | return false; 190 | } 191 | true 192 | } 193 | 194 | // async fetch value from value_log. 195 | pub(crate) async fn pre_fetch_value(&self) -> Result<()> { 196 | let kv = self.kv.clone(); 197 | kv.yield_item_value(self.clone(), |value| { 198 | let status_wl = self.status.clone(); 199 | let value = value.to_vec(); 200 | let value_wl = self.value.clone(); 201 | Box::pin(async move { 202 | status_wl.store(Prefetched, Ordering::Release); 203 | if value.is_empty() { 204 | return Ok(()); 205 | } 206 | let mut value_wl = value_wl.lock().await; 207 | *value_wl = value; 208 | Ok(()) 209 | }) 210 | }) 211 | .await 212 | } 213 | 214 | // Returns approximate size of the key-value pair. 215 | // 216 | // This can be called while iterating through a store to quickly estimate the 217 | // size of a range of key-value pairs (without fetching the corresponding) 218 | // values). 219 | pub(crate) fn estimated_size(&self) -> u64 { 220 | if !self.has_value() { 221 | return 0; 222 | } 223 | if self.meta & MetaBit::BIT_VALUE_POINTER.bits() == 0 { 224 | return (self.key.len() + self.vptr.len()) as u64; 225 | } 226 | let mut vpt = ValuePointer::default(); 227 | vpt.dec(&mut Cursor::new(&self.vptr)).unwrap(); 228 | vpt.len as u64 // includes key length 229 | } 230 | 231 | // Returns the CAS counter associated with the value. 232 | pub(crate) fn counter(&self) -> u64 { 233 | self.cas_counter.load(atomic::Ordering::Acquire) 234 | } 235 | 236 | // Returns the user_meta set by the user. Typically, this byte, optionally set by the user 237 | // is used to interpret the value. 238 | pub(crate) fn user_meta(&self) -> u8 { 239 | self.user_meta 240 | } 241 | 242 | pub(crate) fn meta(&self) -> u8 { 243 | self.meta 244 | } 245 | 246 | pub(crate) fn vptr(&self) -> &[u8] { 247 | &self.vptr 248 | } 249 | } 250 | 251 | // Used to set options when iterating over Badger key-value stores. 252 | #[derive(Debug, Clone, Copy)] 253 | pub struct IteratorOptions { 254 | // Indicates whether we should prefetch values during iteration and store them. 255 | pub(crate) pre_fetch_values: bool, 256 | // How may KV pairs to prefetch while iterating. Valid only if PrefetchValues is true. 257 | pub(crate) pre_fetch_size: isize, 258 | // Direction of iteration. False is forward, true is backward. 259 | pub(crate) reverse: bool, 260 | } 261 | 262 | impl Default for IteratorOptions { 263 | fn default() -> Self { 264 | DEF_ITERATOR_OPTIONS 265 | } 266 | } 267 | 268 | impl IteratorOptions { 269 | pub fn new(pre_fetch_values: bool, pre_fetch_size: isize, reverse: bool) -> Self { 270 | IteratorOptions { 271 | pre_fetch_values, 272 | pre_fetch_size, 273 | reverse, 274 | } 275 | } 276 | } 277 | 278 | pub(crate) const DEF_ITERATOR_OPTIONS: IteratorOptions = IteratorOptions { 279 | pre_fetch_size: 100, 280 | pre_fetch_values: true, 281 | reverse: false, 282 | }; 283 | 284 | /// Helps iterating over the KV pairs in a lexicographically sorted order. 285 | /// skiplist, sst vlog 286 | /// | | | 287 | /// | | | 288 | /// IteratorExt reference 289 | pub struct IteratorExt { 290 | kv: KV, 291 | itr: MergeIterator, 292 | opt: IteratorOptions, 293 | item: ArcRW>, 294 | // Cache the prefetch keys, not inlcude current value 295 | data: ArcRW>, 296 | has_rewind: ArcRW, 297 | } 298 | 299 | /// TODO FIXME 300 | // impl futures_core::Stream for IteratorExt { 301 | // type Item = KVItem; 302 | // 303 | // fn poll_next( 304 | // mut self: Pin<&mut Self>, 305 | // cx: &mut std::task::Context<'_>, 306 | // ) -> std::task::Poll> { 307 | // let mut has_rewind = self.has_rewind.write(); 308 | // if !*has_rewind { 309 | // *has_rewind = true; 310 | // match Pin::new(&mut pin!(self.rewind())).poll(cx) { 311 | // std::task::Poll::Pending => { 312 | // warn!("<<<>>>>"); 313 | // std::task::Poll::Pending 314 | // } 315 | // std::task::Poll::Ready(None) => std::task::Poll::Ready(None), 316 | // std::task::Poll::Ready(t) => std::task::Poll::Ready(t), 317 | // } 318 | // } else { 319 | // match Pin::new(&mut pin!(self.next())).poll(cx) { 320 | // std::task::Poll::Pending => { 321 | // warn!("<<<>>>>"); 322 | // std::task::Poll::Pending 323 | // } 324 | // std::task::Poll::Ready(None) => std::task::Poll::Ready(None), 325 | // std::task::Poll::Ready(t) => std::task::Poll::Ready(t), 326 | // } 327 | // } 328 | // } 329 | // } 330 | 331 | impl IteratorExt { 332 | pub(crate) fn new(kv: KV, itr: MergeIterator, opt: IteratorOptions) -> IteratorExt { 333 | IteratorExt { 334 | kv, 335 | opt, 336 | itr, 337 | data: ArcRW::default(), 338 | item: Arc::new(Default::default()), 339 | has_rewind: ArcRW::default(), 340 | } 341 | } 342 | 343 | // pub(crate) async fn new_async_iterator( 344 | // kv: KV, 345 | // itr: MergeIterator, 346 | // opt: IteratorOptions, 347 | // ) -> Box> { 348 | // let itr = Self::new(kv, itr, opt); 349 | // Box::new(itr) 350 | // } 351 | 352 | // Seek to the provided key if present. If absent, if would seek to the next smallest key 353 | // greater than provided if iterating in the forward direction. Behavior would be reversed is 354 | // iterating backwards. 355 | pub async fn seek(&self, key: &[u8]) -> Option { 356 | while let Some(el) = self.data.write().pop_front() { 357 | el.rl().await.wg.wait().await; 358 | } 359 | while let Some(el) = self.itr.seek(key) { 360 | if el.key().starts_with(_BADGER_PREFIX) { 361 | continue; 362 | } 363 | break; 364 | } 365 | self.pre_fetch().await; 366 | self.item.read().clone() 367 | } 368 | 369 | // Rewind the iterator cursor all the wy to zero-th position, which would be the 370 | // smallest key if iterating forward, and largest if iterating backward. It dows not 371 | // keep track of whether the cursor started with a `seek`. 372 | pub async fn rewind(&self) -> Option { 373 | while let Some(el) = self.data.write().pop_front() { 374 | // Just cleaner to wait before pushing. No ref counting need. 375 | el.rl().await.wg.wait().await; 376 | } 377 | // rewind the iterator 378 | // rewind, next, rewind?, thie item is who! 379 | let mut item = self.itr.rewind(); 380 | // filter internal data 381 | while item.is_some() && item.as_ref().unwrap().key().starts_with(_BADGER_PREFIX) { 382 | item = self.itr.next(); 383 | } 384 | // Before every rewind, the item will be reset to None 385 | self.item.write().take(); 386 | // prefetch item. 387 | self.pre_fetch().await; 388 | // return the first el. 389 | self.item.read().clone() 390 | } 391 | 392 | // Advance the iterator by one (*NOTICE*: must be rewind when you call self.next()) 393 | pub async fn next(&self) -> Option { 394 | // Ensure current item has load 395 | if let Some(el) = self.item.write().take() { 396 | el.rl().await.wg.wait().await; // Just cleaner to wait before pushing to avoid doing ref counting. 397 | } 398 | // Set next item to current 399 | if let Some(el) = self.data.write().pop_front() { 400 | self.item.write().replace(el); 401 | } 402 | // Advance internal iterator until entry is not deleted 403 | while let Some(el) = self.itr.next() { 404 | if el.key().starts_with(_BADGER_PREFIX) { 405 | continue; 406 | } 407 | if el.value().meta & MetaBit::BIT_DELETE.bits() == 0 { 408 | // Not deleted 409 | break; 410 | } 411 | } 412 | let item = self.itr.peek(); 413 | if item.is_none() { 414 | return None; 415 | } 416 | 417 | let xitem = self.new_item(); 418 | self.fill(xitem.clone()).await; 419 | self.data.write().push_back(xitem.clone()); 420 | Some(xitem) 421 | } 422 | 423 | pub async fn peek(&self) -> Option { 424 | self.item.read().clone() 425 | } 426 | } 427 | 428 | impl IteratorExt { 429 | // Returns false when iteration is done 430 | // or when the current key is not prefixed by the specified prefix. 431 | async fn valid_for_prefix(&self, prefix: &[u8]) -> bool { 432 | self.item.read().is_some() 433 | && self 434 | .item 435 | .read() 436 | .as_ref() 437 | .unwrap() 438 | .rl() 439 | .await 440 | .key() 441 | .starts_with(prefix) 442 | } 443 | 444 | // Close the iterator, It is important to call this when you're done with iteration. 445 | pub async fn close(&self) -> Result<()> { 446 | // TODO: We could handle this error. 447 | self.kv.vlog.as_ref().unwrap().decr_iterator_count().await?; 448 | Ok(()) 449 | } 450 | 451 | // fill the value 452 | async fn fill(&self, item: KVItem) { 453 | let vs = self.itr.peek().unwrap(); 454 | let vs = vs.value(); 455 | { 456 | let mut item = item.wl().await; 457 | item.meta = vs.meta; 458 | item.user_meta = vs.user_meta; 459 | item.cas_counter.store(vs.cas_counter, Ordering::Release); 460 | item.key.extend(self.itr.peek().as_ref().unwrap().key()); 461 | item.vptr.extend(&vs.value); 462 | item.value.lock().await.clear(); 463 | } 464 | 465 | // need fetch value, use new coroutine to load value. 466 | if self.opt.pre_fetch_values { 467 | item.rl().await.wg.add_running(1); 468 | tokio::spawn(async move { 469 | // FIXME we are not handling errors here. 470 | { 471 | let item = item.rl().await; 472 | if let Err(err) = item.pre_fetch_value().await { 473 | log::error!("Failed to fetch value, {}", err); 474 | } 475 | } 476 | item.rl().await.wg.done(); 477 | }); 478 | } 479 | } 480 | 481 | // Prefetch load items. 482 | async fn pre_fetch(&self) { 483 | let mut pre_fetch_size = 2; 484 | if self.opt.pre_fetch_values && self.opt.pre_fetch_size > 1 { 485 | pre_fetch_size = self.opt.pre_fetch_size; 486 | } 487 | 488 | let itr = &self.itr; 489 | let mut count = 0; 490 | while let Some(item) = itr.peek() { 491 | if item.key().starts_with(crate::kv::_BADGER_PREFIX) { 492 | itr.next(); 493 | continue; 494 | } 495 | if item.value().meta & MetaBit::BIT_DELETE.bits() > 0 { 496 | itr.next(); 497 | continue; 498 | } 499 | count += 1; 500 | let xitem = self.new_item(); 501 | // fill a el from itr.peek 502 | self.fill(xitem.clone()).await; 503 | if self.item.read().is_none() { 504 | self.item.write().replace(xitem); // store it 505 | } else { 506 | // push prefetch el into cache queue, Notice it not including current item 507 | self.data.write().push_back(xitem); 508 | } 509 | if count == pre_fetch_size { 510 | break; 511 | } 512 | itr.next(); 513 | } 514 | } 515 | 516 | fn new_item(&self) -> KVItem { 517 | let inner_item = KVItemInner { 518 | status: Arc::new(Atomic::new(PreFetchStatus::Empty)), 519 | kv: self.kv.clone(), 520 | key: vec![], 521 | value: TArcMx::new(Default::default()), 522 | vptr: vec![], 523 | meta: 0, 524 | user_meta: 0, 525 | cas_counter: Arc::new(Default::default()), 526 | wg: Closer::new("IteratorExt".to_owned()), 527 | err: Ok(()), 528 | }; 529 | return KVItem::from(inner_item); 530 | } 531 | 532 | // Returns false when iteration is done. 533 | fn valid(&self) -> bool { 534 | self.item.read().is_some() 535 | } 536 | } 537 | -------------------------------------------------------------------------------- /src/level_handler.rs: -------------------------------------------------------------------------------- 1 | use crate::compaction::KeyRange; 2 | 3 | use crate::table::iterator::{IteratorImpl, IteratorItem}; 4 | use crate::table::table::Table; 5 | use crate::types::XArc; 6 | 7 | use crate::{event, hex_str, Result}; 8 | use core::slice::SlicePattern; 9 | use std::fmt::Display; 10 | 11 | use crate::options::Options; 12 | 13 | use drop_cell::defer; 14 | use log::{debug, info, warn}; 15 | use parking_lot::lock_api::{RwLockReadGuard, RwLockWriteGuard}; 16 | use parking_lot::{RawRwLock, RwLock}; 17 | use std::collections::HashSet; 18 | 19 | use std::sync::atomic::{AtomicI32, AtomicU64, AtomicUsize, Ordering}; 20 | use std::sync::Arc; 21 | 22 | pub(crate) type LevelHandler = XArc; 23 | 24 | impl From for LevelHandler { 25 | fn from(value: LevelHandlerInner) -> Self { 26 | XArc::new(value) 27 | } 28 | } 29 | 30 | impl LevelHandler { 31 | // Check does some sanity check on one level of data or in-memory index. 32 | pub(crate) fn validate(&self) -> Result<()> { 33 | self.lock_exclusive(); 34 | defer! {self.unlock_exclusive();} 35 | if self.level() == 0 { 36 | return Ok(()); 37 | } 38 | let tables = self.tables.write(); 39 | let num_tables = tables.len(); 40 | for j in 1..num_tables { 41 | if j >= tables.len() { 42 | return Err(format!( 43 | "Level={}, j={}, number_tables={}", 44 | self.level(), 45 | j, 46 | num_tables 47 | ) 48 | .into()); 49 | } 50 | 51 | // overlap occurs 52 | if tables[j - 1].biggest() >= tables[j].smallest() { 53 | return Err(format!( 54 | "Inter: {} vs {}: level={} j={} numTables={}", 55 | hex_str(tables[j - 1].biggest()), 56 | hex_str(tables[j].smallest()), 57 | self.level(), 58 | j, 59 | num_tables 60 | ) 61 | .into()); 62 | } 63 | if tables[j].smallest() > tables[j].biggest() { 64 | return Err(format!( 65 | "Intra: {} vs {}: level={} j={} numTables={}", 66 | hex_str(tables[j].smallest()), 67 | hex_str(tables[j].biggest()), 68 | self.level(), 69 | j, 70 | num_tables 71 | ) 72 | .into()); 73 | } 74 | } 75 | 76 | Ok(()) 77 | } 78 | 79 | // Returns true if the non-zero level may be compacted. *del_size* provides the size of the tables 80 | // which are currently being compacted so that we treat them as already having started being 81 | // compacted (because they have been, yet their size is already counted in get_total_size). 82 | pub(crate) fn is_compactable(&self, del_size: u64) -> bool { 83 | let compactable = self.get_total_size() - del_size >= self.get_max_total_size(); 84 | 85 | #[cfg(test)] 86 | debug!( 87 | "trace level{}, does it compactable, total_size:{}, del_size:{}, max_size:{}, yes: {}", 88 | self.level(), 89 | self.get_total_size(), 90 | del_size, 91 | self.get_max_total_size(), 92 | compactable, 93 | ); 94 | 95 | compactable 96 | } 97 | 98 | pub(crate) fn get_total_size(&self) -> u64 { 99 | self.total_size.load(Ordering::Relaxed) 100 | } 101 | 102 | pub(crate) fn incr_total_size(&self, n: u64) { 103 | let old = self.total_size.fetch_add(n, Ordering::Relaxed); 104 | #[cfg(test)] 105 | info!( 106 | "incr level{} total size: {} => {}", 107 | self.level(), 108 | old, 109 | self.get_total_size() 110 | ); 111 | } 112 | 113 | pub(crate) fn decr_total_size(&self, n: u64) { 114 | let old = self.total_size.fetch_sub(n, Ordering::Relaxed); 115 | #[cfg(test)] 116 | info!( 117 | "decr level{} total size: {} => {}", 118 | self.level(), 119 | old, 120 | self.get_total_size() 121 | ); 122 | } 123 | 124 | pub(crate) fn get_max_total_size(&self) -> u64 { 125 | self.max_total_size.load(Ordering::Relaxed) 126 | } 127 | 128 | // delete current level's tables of to_del 129 | pub(crate) fn delete_tables(&self, to_del: Vec) { 130 | let to_del_set = to_del.iter().map(|id| *id).collect::>(); 131 | let level = self.level(); 132 | let mut tb_wl = self.tables_wl(); 133 | let before_tids = tb_wl.iter().map(|tb| tb.id()).collect::>(); 134 | { 135 | tb_wl.retain_mut(|tb| { 136 | if to_del_set.contains(&tb.id()) { 137 | // delete table reference 138 | tb.decr_ref(); 139 | self.decr_total_size(tb.size() as u64); 140 | return false; 141 | } 142 | true 143 | }); 144 | } 145 | let after_tids = tb_wl.iter().map(|tb| tb.id()).collect::>(); 146 | warn!( 147 | "after delete tables level:{}, {:?} => {:?}, to_del: {:?}", 148 | level, before_tids, after_tids, to_del, 149 | ); 150 | } 151 | 152 | // init with tables 153 | pub(crate) fn init_tables(&self, tables: Vec
) { 154 | let total_size = tables.iter().fold(0, |acc, table| acc + table.size()); 155 | self.total_size.store(total_size as u64, Ordering::Relaxed); 156 | let mut tb_wl = self.tables_wl(); 157 | (*tb_wl) = tables; 158 | if self.level() == 0 { 159 | // key range will overlap. Just sort by file_id in ascending order 160 | // because newer tables are at the end of level 0. 161 | tb_wl.sort_by_key(|tb| tb.id()); 162 | } else { 163 | // Sort tables by keys. 164 | tb_wl.sort_by_key(|tb| tb.smallest().to_vec()); 165 | } 166 | } 167 | 168 | // Get table write lock guards. 169 | fn tables_wl(&self) -> RwLockWriteGuard<'_, RawRwLock, Vec
> { 170 | self.tables.write() 171 | } 172 | 173 | // Get table read lock guards 174 | fn tables_rd(&self) -> RwLockReadGuard<'_, RawRwLock, Vec
> { 175 | self.tables.read() 176 | } 177 | 178 | pub(crate) fn debug_tables(&self) { 179 | let tw = self.tables_rd(); 180 | info!( 181 | "=============debug tables, level: {}=====================", 182 | self.level() 183 | ); 184 | for tb in tw.iter() { 185 | info!( 186 | "|tid:{}, smallest:{}, biggest:{}, size: {}|", 187 | tb.id(), 188 | hex_str(tb.smallest()), 189 | hex_str(tb.biggest()), 190 | tb.size(), 191 | ); 192 | } 193 | info!("------------------------end-----------------------------"); 194 | } 195 | 196 | // Returns the tables that intersect with key range. Returns a half-interval [left, right). 197 | // This function should already have acquired a read lock, and this is so important the caller must 198 | // pass an empty parameter declaring such. 199 | pub(crate) fn overlapping_tables(&self, key_range: &KeyRange) -> (usize, usize) { 200 | // probe.biggest() >= left 201 | let left = self 202 | .tables_rd() 203 | .binary_search_by(|probe| probe.biggest().cmp(&key_range.left)); 204 | let right = self 205 | .tables_rd() 206 | .binary_search_by(|probe| probe.smallest().cmp(&key_range.right)); 207 | 208 | info!( 209 | "overlapping tables, range: {}, left: {:?}, right: {:?}", 210 | key_range, left, right 211 | ); 212 | let left = left.unwrap_or_else(|n| n); 213 | let right = right.map(|n| n + 1).unwrap_or_else(|n| n); 214 | if left == right { 215 | // simple handle 216 | return (0, 0); 217 | } 218 | (left, right) 219 | } 220 | 221 | pub(crate) fn get_total_siz(&self) -> u64 { 222 | self.total_size.load(Ordering::Relaxed) 223 | } 224 | 225 | // Replace tables[left:right] with new_tables, Note this EXCLUDES tables[right]. 226 | // You must be call decr() to delete the old tables _after_ writing the update to the manifest. 227 | pub(crate) fn replace_tables(&self, new_tables: Vec
) -> Result<()> { 228 | // Need to re-search the range of tables in this level to be replaced as other goroutines might 229 | // be changing it as well. (They can't touch our tables, but if they add/remove other tables, 230 | // the indices get shifted around.) 231 | if new_tables.is_empty() { 232 | info!("No tables need to replace"); 233 | return Ok(()); 234 | } 235 | // TODO Add lock (think of level's sharing lock) 236 | // Increase total_size first. 237 | for tb in &new_tables { 238 | self.incr_total_size(tb.size() as u64); 239 | // add table reference 240 | tb.incr_ref(); 241 | } 242 | let key_range = KeyRange { 243 | left: new_tables.first().unwrap().smallest().to_vec(), 244 | right: new_tables.last().unwrap().biggest().to_vec(), 245 | inf: false, 246 | }; 247 | 248 | // TODO Opz code 249 | { 250 | let level_id = self.level(); 251 | let mut tables_lck = self.tables_wl(); 252 | let old_ids = tables_lck.iter().map(|tb| tb.id()).collect::>(); 253 | // TODO FIXME may be it is error. 254 | tables_lck.retain_mut(|tb| { 255 | let left = tb.biggest() <= key_range.left.as_slice(); 256 | let right = tb.smallest() > key_range.right.as_slice(); 257 | if left || right { 258 | return true; 259 | } else { 260 | // TODO it should be not a good idea decr reference here, slow lock 261 | // decr table reference 262 | tb.decr_ref(); 263 | self.decr_total_size(tb.size() as u64); 264 | false 265 | } 266 | }); 267 | let will_add = new_tables.iter().map(|tb| tb.id()).collect::>(); 268 | tables_lck.extend(new_tables); 269 | // TODO avoid resort 270 | tables_lck.sort_by(|a, b| a.smallest().cmp(b.smallest())); 271 | 272 | let new_ids = tables_lck.iter().map(|tb| tb.id()).collect::>(); 273 | info!( 274 | "after replace tables, level:{}, will_add:{:?}, {:?} => {:?}", 275 | level_id, will_add, old_ids, new_ids 276 | ); 277 | } 278 | Ok(()) 279 | } 280 | 281 | // Return true if ok and no stalling that will hold a new table reference 282 | pub(crate) async fn try_add_level0_table(&self, t: Table) -> bool { 283 | assert_eq!(self.get_level(), 0); 284 | let mut tw = self.tables_wl(); 285 | if tw.len() >= self.opt.num_level_zero_tables_stall { 286 | // Too many tables at zero level need compact 287 | return false; 288 | } 289 | t.incr_ref(); 290 | self.incr_total_size(t.size() as u64); 291 | tw.push(t); 292 | true 293 | } 294 | 295 | pub(crate) fn num_tables(&self) -> usize { 296 | self.tables_rd().len() 297 | } 298 | 299 | // Must be call only once 300 | pub(crate) fn close(&self) -> Result<()> { 301 | let tw = self.tables_wl(); 302 | tw.iter().for_each(|tb| tb.decr_ref()); 303 | Ok(()) 304 | } 305 | 306 | // Acquires a read-lock to access s.tables. It returns a list of table_handlers. 307 | pub(crate) fn get_table_for_key(&self, key: &[u8]) -> Option { 308 | return if self.get_level() == 0 { 309 | // For level 0, we need to check every table. Remember to make a copy as self.tables may change 310 | // once we exit this function, and we don't want to lock the self.tables while seeking in tabbles. 311 | // CAUTION: Reverse the tables. 312 | let tw = self.tables_rd(); 313 | for tb in tw.iter().rev() { 314 | tb.incr_ref(); 315 | // check it by bloom filter 316 | if tb.does_not_have(key) { 317 | //debug!("not contain it, key #{}, st: {}", hex_str(key), tb.id()); 318 | event::get_metrics().num_lsm_bloom_hits.inc(); 319 | tb.decr_ref(); 320 | continue; 321 | } 322 | event::get_metrics().num_lsm_gets.inc(); 323 | let it = IteratorImpl::new(tb.clone(), false); 324 | let item = it.seek(key); 325 | tb.decr_ref(); 326 | if let Some(item) = item { 327 | if item.key() != key { 328 | continue; 329 | } 330 | return Some(item); 331 | } 332 | } 333 | None 334 | } else { 335 | //self.debug_tables(); 336 | let tw = self.tables_rd(); 337 | let ok = tw.binary_search_by(|tb| tb.biggest().cmp(key)); 338 | // #[cfg(test)] 339 | // info!("find key #{} at level{}, {:?}", hex_str(key), self.level(), ok.unwrap_or_else(|n| n)); 340 | 341 | let index = ok.unwrap_or_else(|n| n); 342 | if index >= tw.len() { 343 | // todo add metrics 344 | return None; 345 | } 346 | let tb = tw.get(index).unwrap(); 347 | tb.incr_ref(); 348 | if tb.does_not_have(key) { 349 | //debug!("not contain it, key #{}, st: {}", hex_str(key), tb.id()); 350 | event::get_metrics().num_lsm_bloom_hits.inc(); 351 | tb.decr_ref(); 352 | return None; 353 | } 354 | event::get_metrics().num_lsm_gets.inc(); 355 | let it = IteratorImpl::new(tb.clone(), false); 356 | let item = it.seek(key); 357 | tb.decr_ref(); 358 | if let Some(item) = item { 359 | if item.key() == key { 360 | return Some(item); 361 | } 362 | } 363 | return None; 364 | }; 365 | } 366 | 367 | pub(crate) fn get(&self, key: &[u8]) -> Option { 368 | self.get_table_for_key(key) 369 | } 370 | 371 | // returns current level 372 | pub(crate) fn level(&self) -> usize { 373 | self.level.load(Ordering::Relaxed) as usize 374 | } 375 | 376 | pub(crate) fn to_log(&self) -> String { 377 | format!("{}", self) 378 | } 379 | } 380 | 381 | impl Display for LevelHandler { 382 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 383 | f.debug_struct("LevelHandler") 384 | .field("level", &self.get_level()) 385 | .field("max", &self.max_total_size.load(Ordering::Relaxed)) 386 | .field( 387 | "tables", 388 | &self 389 | .tables_rd() 390 | .iter() 391 | .map(|tb| tb.id()) 392 | .collect::>(), 393 | ) 394 | .finish() 395 | } 396 | } 397 | 398 | pub(crate) struct LevelHandlerInner { 399 | // TODO this lock maybe global, not only for compacted 400 | pub(crate) self_lock: Arc>, 401 | // Guards tables, total_size. 402 | // For level >= 1, *tables* are sorted by key ranges, which do not overlap. 403 | // For level 0, *tables* are sorted by time. 404 | // For level 0, *newest* table are at the back. Compact the oldest one first, which is at the front. 405 | // TODO tables and total_size maybe should be lock with same lock. 406 | pub(crate) tables: Arc>>, 407 | pub(crate) total_size: AtomicU64, 408 | // The following are initialized once and const. 409 | pub(crate) level: AtomicUsize, 410 | str_level: Arc, 411 | pub(crate) max_total_size: AtomicU64, 412 | opt: Options, 413 | } 414 | 415 | impl LevelHandlerInner { 416 | pub(crate) fn new(opt: Options, level: usize) -> LevelHandlerInner { 417 | LevelHandlerInner { 418 | self_lock: Arc::new(Default::default()), 419 | tables: Arc::new(Default::default()), 420 | total_size: Default::default(), 421 | level: AtomicUsize::new(level), 422 | str_level: Arc::new(format!("L{}", level)), 423 | max_total_size: Default::default(), 424 | opt, 425 | } 426 | } 427 | 428 | #[inline] 429 | pub(crate) fn get_level(&self) -> usize { 430 | self.level.load(Ordering::Acquire) 431 | } 432 | 433 | #[inline] 434 | pub(crate) fn lock_shared(&self) { 435 | use parking_lot::lock_api::RawRwLock; 436 | unsafe { self.self_lock.raw().lock_shared() } 437 | } 438 | 439 | #[inline] 440 | pub(crate) fn try_lock_share(&self) -> bool { 441 | use parking_lot::lock_api::RawRwLock; 442 | unsafe { self.self_lock.raw().try_lock_shared() } 443 | } 444 | 445 | #[inline] 446 | pub(crate) fn unlock_shared(&self) { 447 | use parking_lot::lock_api::RawRwLock; 448 | unsafe { self.self_lock.raw().unlock_shared() } 449 | } 450 | 451 | #[inline] 452 | pub(crate) fn lock_exclusive(&self) { 453 | use parking_lot::lock_api::RawRwLock; 454 | unsafe { self.self_lock.raw().lock_exclusive() } 455 | } 456 | 457 | #[inline] 458 | pub(crate) fn try_lock_exclusive(&self) -> bool { 459 | use parking_lot::lock_api::RawRwLock; 460 | unsafe { self.self_lock.raw().try_lock_exclusive() } 461 | } 462 | 463 | #[inline] 464 | pub(crate) fn unlock_exclusive(&self) { 465 | use parking_lot::lock_api::RawRwLock; 466 | unsafe { self.self_lock.raw().unlock_exclusive() } 467 | } 468 | } 469 | 470 | #[test] 471 | fn raw_lock() { 472 | let lock = LevelHandlerInner::new(Options::default(), 10); 473 | lock.lock_shared(); 474 | lock.lock_shared(); 475 | assert_eq!(false, lock.try_lock_exclusive()); 476 | lock.unlock_shared(); 477 | lock.unlock_shared(); 478 | 479 | assert_eq!(true, lock.try_lock_exclusive()); 480 | assert_eq!(false, lock.try_lock_share()); 481 | lock.unlock_exclusive(); 482 | assert_eq!(true, lock.try_lock_share()); 483 | } 484 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(async_iterator)] 2 | #![feature(pointer_byte_offsets)] 3 | #![feature(sync_unsafe_cell)] 4 | #![feature(associated_type_defaults)] 5 | #![feature(type_alias_impl_trait)] 6 | #![feature(strict_provenance_atomic_ptr)] 7 | #![feature(atomic_from_mut)] 8 | #![feature(cursor_remaining)] 9 | #![feature(pattern)] 10 | #![feature(cell_leak)] 11 | #![feature(path_file_prefix)] 12 | #![feature(fs_try_exists)] 13 | #![feature(generic_associated_types)] 14 | #![feature(unwrap_infallible)] 15 | #![feature(slice_pattern)] 16 | #![feature(slice_take)] 17 | #![feature(arc_into_inner)] 18 | #![feature(async_closure)] 19 | #![feature(let_chains)] 20 | #![feature(stmt_expr_attributes)] 21 | #![feature(backtrace_frames)] 22 | #![feature(binary_heap_into_iter_sorted)] 23 | #![feature(test)] 24 | #![feature(atomic_from_ptr, pointer_is_aligned)] 25 | 26 | 27 | /// Badger DB is an embedded keyvalue database. 28 | /// 29 | /// Badger DB is a library written in Rust that implements a badger-go [https://github.com/dgraph-io/badger] 30 | /// bager-rs will implements all features of badger-go 31 | use std::mem::align_of; 32 | 33 | mod event; 34 | mod iterator; 35 | pub mod kv; 36 | mod level_handler; 37 | mod log_file; 38 | mod manifest; 39 | mod options; 40 | mod skl; 41 | mod table; 42 | mod types; 43 | mod value_log; 44 | #[cfg(test)] 45 | mod value_log_tests; 46 | mod y; 47 | 48 | mod compaction; 49 | // #[cfg(test)] 50 | // mod kv_test; 51 | #[cfg(test)] 52 | mod kv_test; 53 | mod levels; 54 | mod pb; 55 | mod st_manager; 56 | #[cfg(test)] 57 | mod test_util; 58 | mod backup; 59 | 60 | pub use iterator::*; 61 | pub use kv::*; 62 | pub use options::*; 63 | pub use skl::*; 64 | pub use st_manager::*; 65 | pub use y::*; 66 | 67 | #[allow(dead_code)] 68 | #[inline] 69 | pub(crate) fn must_align(ptr: *const T) { 70 | let actual = (ptr as usize) % align_of::() == 0; 71 | assert!(actual); 72 | } 73 | 74 | #[allow(dead_code)] 75 | #[inline] 76 | pub(crate) fn cals_size_with_align(sz: usize, align_sz: usize) -> usize { 77 | let size = (sz + align_sz) & !align_sz; 78 | size 79 | } -------------------------------------------------------------------------------- /src/lock.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/laohanlinux/badger-rs/00bbe70da1f4b0fe7d52ffbdf8da91867a147834/src/lock.txt -------------------------------------------------------------------------------- /src/log_file.rs: -------------------------------------------------------------------------------- 1 | use crate::types::Closer; 2 | use crate::value_log::{Entry, Header, ValuePointer}; 3 | use crate::y::{create_synced_file, Result}; 4 | use crate::y::{is_eof, Decode}; 5 | use std::env::temp_dir; 6 | 7 | use async_channel::Sender; 8 | use byteorder::{BigEndian, ReadBytesExt}; 9 | use drop_cell::defer; 10 | use either::Either; 11 | use log::{debug, info}; 12 | use memmap::{Mmap, MmapMut}; 13 | 14 | use std::fmt::{Debug, Formatter}; 15 | use std::fs::File; 16 | use std::future::Future; 17 | use std::io::{Read, Seek, SeekFrom, Write}; 18 | use std::ops::Deref; 19 | use std::pin::Pin; 20 | use std::sync::atomic::AtomicU64; 21 | 22 | use crate::event; 23 | use tokio::select; 24 | 25 | // MmapType is a Mmap and MmapMut tule 26 | pub(crate) struct MmapType(Either); 27 | 28 | impl MmapType { 29 | pub(crate) fn get_mmap(&self) -> &Mmap { 30 | match self.0 { 31 | Either::Left(ref _mmap) => _mmap, 32 | _ => panic!("It should be not happen"), 33 | } 34 | } 35 | 36 | pub(crate) fn get_mut_mmap(&self) -> &MmapMut { 37 | match self.0 { 38 | Either::Right(ref m) => m, 39 | _ => panic!("It should be not happen"), 40 | } 41 | } 42 | 43 | pub(crate) fn get_mut_mmap_ref(&mut self) -> &mut MmapMut { 44 | match self.0 { 45 | Either::Right(ref mut m) => m, 46 | _ => panic!("It should be not happen"), 47 | } 48 | } 49 | } 50 | 51 | impl Deref for MmapType { 52 | type Target = Either; 53 | 54 | fn deref(&self) -> &Self::Target { 55 | &self.0 56 | } 57 | } 58 | 59 | impl From for MmapType { 60 | fn from(value: Mmap) -> Self { 61 | Self(Either::Left(value)) 62 | } 63 | } 64 | 65 | impl From for MmapType { 66 | fn from(value: MmapMut) -> Self { 67 | Self(Either::Right(value)) 68 | } 69 | } 70 | 71 | pub(crate) struct LogFile { 72 | pub(crate) _path: Box, 73 | pub(crate) fd: Option, 74 | pub(crate) fid: u32, 75 | pub(crate) _mmap: Option, 76 | pub(crate) sz: u32, 77 | } 78 | 79 | impl Debug for LogFile { 80 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 81 | f.debug_struct("LogFile") 82 | .field("path", self._path.as_ref()) 83 | .field("fd", &self.fid) 84 | .field("size", &self.sz) 85 | .finish() 86 | } 87 | } 88 | 89 | impl LogFile { 90 | // async read *n* entries 91 | pub(crate) async fn read_entries( 92 | &self, 93 | offset: u32, 94 | n: usize, 95 | ) -> Result<(Vec<(Entry, ValuePointer)>, u32)> { 96 | let m = self.mmap_slice(); 97 | let mut cursor_offset = offset; 98 | let mut v = vec![]; 99 | while cursor_offset < m.len() as u32 && v.len() < n { 100 | let entry = Entry::from_slice(cursor_offset, m)?; 101 | let mut vpt = ValuePointer::default(); 102 | vpt.fid = self.fid; 103 | vpt.len = 104 | Header::encoded_size() as u32 + (entry.key.len() + entry.value.len()) as u32 + 4; 105 | vpt.offset = cursor_offset; 106 | cursor_offset += vpt.len; 107 | v.push((entry, vpt)) 108 | } 109 | Ok((v, cursor_offset)) 110 | } 111 | 112 | pub(crate) async fn async_iterate_by_offset( 113 | &self, 114 | ctx: Closer, 115 | mut offset: u32, 116 | notify: Sender<(Entry, ValuePointer)>, 117 | ) { 118 | defer! {ctx.done()} 119 | defer! {notify.close();} 120 | let has_been_close = ctx.has_been_closed(); 121 | loop { 122 | let (v, next) = self.read_entries(offset, 1).await.unwrap(); 123 | offset = next; 124 | if v.is_empty() { 125 | return; 126 | } else { 127 | // TODO batch sender 128 | for item in v { 129 | select! { 130 | _ = has_been_close.recv() => {}, 131 | _ = notify.send(item) => {}, 132 | } 133 | } 134 | } 135 | } 136 | } 137 | 138 | // async iterate from offset that must be call with thread safety 139 | pub(crate) async fn iterate_by_offset( 140 | &self, 141 | mut offset: u32, 142 | f: &mut impl for<'a> FnMut( 143 | &'a Entry, 144 | &'a ValuePointer, 145 | ) -> Pin> + 'a>>, 146 | ) -> Result<()> { 147 | loop { 148 | let (v, next) = self.read_entries(offset, 1).await?; 149 | if v.is_empty() { 150 | return Ok(()); 151 | } 152 | 153 | for (entry, vptr) in v.iter() { 154 | if !f(entry, vptr).await? { 155 | return Ok(()); 156 | } 157 | offset = next; 158 | } 159 | } 160 | } 161 | 162 | // It should be call by one thread. 163 | pub(crate) async fn iterate( 164 | &mut self, 165 | offset: u32, 166 | f: &mut impl for<'a> FnMut( 167 | &'a Entry, 168 | &'a ValuePointer, 169 | ) -> Pin> + 'a>>, 170 | ) -> Result<()> { 171 | let mut fd = self.fd.as_mut().unwrap(); 172 | fd.seek(SeekFrom::Start(offset as u64))?; 173 | let mut entry = Entry::default(); 174 | let _truncate = false; // because maybe abort before write 175 | let mut record_offset = offset; 176 | loop { 177 | let mut h = Header::default(); 178 | let ok = h.dec(&mut fd); 179 | if ok.is_err() && ok.as_ref().unwrap_err().is_io_eof() { 180 | break; 181 | } 182 | // todo add truncate currenct 183 | ok?; 184 | if h.k_len as usize > entry.key.capacity() { 185 | entry.key = vec![0u8; h.k_len as usize]; 186 | } 187 | if h.v_len as usize > entry.value.capacity() { 188 | entry.value = vec![0u8; h.v_len as usize]; 189 | } 190 | entry.key.clear(); 191 | entry.value.clear(); 192 | 193 | let ok = fd.read(&mut entry.key); 194 | if is_eof(&ok) { 195 | break; 196 | } 197 | ok?; 198 | 199 | let ok = fd.read(&mut entry.value); 200 | if is_eof(&ok) { 201 | break; 202 | } 203 | ok?; 204 | entry.offset = record_offset; 205 | entry.meta = h.meta; 206 | entry.user_meta = h.user_mata; 207 | entry.cas_counter = AtomicU64::new(h.cas_counter); 208 | entry.cas_counter_check = h.cas_counter_check; 209 | let ok = fd.read_u32::(); 210 | if is_eof(&ok) { 211 | break; 212 | } 213 | let _crc = ok?; 214 | 215 | let mut vp = ValuePointer::default(); 216 | vp.len = Header::encoded_size() as u32 + h.k_len + h.v_len + 4; 217 | record_offset += vp.len; 218 | 219 | vp.offset = entry.offset; 220 | vp.fid = self.fid; 221 | 222 | let _continue = f(&entry, &vp).await?; 223 | if !_continue { 224 | break; 225 | } 226 | } 227 | 228 | // todo add truncate 229 | Ok(()) 230 | } 231 | } 232 | 233 | impl LogFile { 234 | // new LogFile with special path. 235 | pub(crate) fn new(path: &str) -> Result { 236 | let mut lf = LogFile { 237 | _path: Box::new(path.to_string()), 238 | fd: None, 239 | fid: 0, 240 | _mmap: None, 241 | sz: 0, 242 | }; 243 | lf.open_read_only()?; 244 | Ok(lf) 245 | } 246 | 247 | // open only read permission 248 | pub(crate) fn open_read_only(&mut self) -> Result<()> { 249 | let fd = std::fs::OpenOptions::new() 250 | .read(true) 251 | .open(self._path.as_ref())?; 252 | let meta = fd.metadata()?; 253 | let file_sz = meta.len(); 254 | let mut _mmap = unsafe { Mmap::map(&fd)? }; 255 | self._mmap.replace(_mmap.into()); 256 | self.fd.replace(fd); 257 | self.sz = file_sz as u32; 258 | Ok(()) 259 | } 260 | 261 | // Acquire lock on mmap if you are calling this. 262 | pub(crate) fn read(&self, p: &ValuePointer) -> Result<&[u8]> { 263 | #[cfg(test)] 264 | debug!( 265 | "ready to read bytes from mmap, {}, {:?}", 266 | self._mmap.as_ref().unwrap().is_left(), 267 | p 268 | ); 269 | let offset = p.offset; 270 | let mmp = self._mmap.as_ref().unwrap(); 271 | event::get_metrics().num_reads.inc(); 272 | event::get_metrics().num_bytes_read.inc_by(p.len as u64); 273 | // todo add metrics 274 | match mmp.0 { 275 | Either::Left(ref m) => Ok(&m.as_ref()[offset as usize..(offset + p.len) as usize]), 276 | Either::Right(ref m) => Ok(&m.as_ref()[offset as usize..(offset + p.len) as usize]), 277 | } 278 | } 279 | 280 | // Done written, reopen with read only permisson for file and mmap. 281 | pub(crate) fn done_writing(&mut self, offset: u32) -> Result<()> { 282 | self.sync()?; 283 | let mut_mmap = self.mut_mmap(); 284 | mut_mmap.flush_async()?; 285 | self.fd.as_mut().unwrap().set_len(offset as u64)?; 286 | self.fd.as_mut().unwrap().sync_all()?; 287 | { 288 | self._mmap.take(); 289 | self.fd.take(); 290 | } 291 | self.open_read_only() 292 | } 293 | 294 | pub(crate) fn set_write(&mut self, sz: u64) -> Result<()> { 295 | self.fd.as_mut().unwrap().set_len(sz as u64)?; 296 | info!("reset file size:{}", sz); 297 | let mut _mmap = unsafe { Mmap::map(&self.fd.as_ref().unwrap())?.make_mut()? }; 298 | self._mmap.replace(MmapType(Either::Right(_mmap))); 299 | self.sz = sz as u32; 300 | Ok(()) 301 | } 302 | 303 | // return mmap slice 304 | fn mmap_slice(&self) -> &[u8] { 305 | let mmap = self._mmap.as_ref().unwrap(); 306 | match mmap.0 { 307 | Either::Left(ref _mmap) => _mmap.as_ref(), 308 | Either::Right(ref _mmap) => _mmap.as_ref(), 309 | } 310 | } 311 | 312 | // return file reference 313 | fn file_ref(&self) -> &File { 314 | self.fd.as_ref().unwrap() 315 | } 316 | 317 | pub(crate) fn mut_mmap(&mut self) -> &mut MmapMut { 318 | let mp = self._mmap.as_mut().unwrap(); 319 | mp.get_mut_mmap_ref() 320 | } 321 | 322 | pub(crate) fn write_buffer(&mut self, buffer: &[u8], offset: usize) -> Result { 323 | let wt = self.mut_mmap(); 324 | let mut wt = &mut wt[offset..]; 325 | wt.write(buffer).map_err(|err| err.into()) 326 | } 327 | 328 | fn mmap_ref(&self) -> &Mmap { 329 | self._mmap.as_ref().unwrap().get_mmap() 330 | } 331 | 332 | // You must hold lf.lock to sync() 333 | fn sync(&mut self) -> Result<()> { 334 | self.fd.as_mut().unwrap().sync_all()?; 335 | Ok(()) 336 | } 337 | } 338 | 339 | #[test] 340 | fn concurrency() { 341 | let mut lf = LogFile::new("src/test_data/vlog_file.text"); 342 | assert!(lf.is_ok(), "{:?}", lf.unwrap_err().to_string()); 343 | } 344 | 345 | #[test] 346 | fn test_mmap() { 347 | let mut fd = std::fs::OpenOptions::new() 348 | .read(true) 349 | .write(true) 350 | .open("src/test_data/vlog_file.text") 351 | .unwrap(); 352 | 353 | let _mmap = unsafe { Mmap::map(&fd).unwrap() }; 354 | println!("{}", _mmap.len()); 355 | println!("{}", _mmap.make_mut().is_err()); 356 | } 357 | 358 | #[test] 359 | fn test_write_file() { 360 | use crate::test_util; 361 | test_util::tracing_log(); 362 | use std::io::Write; 363 | 364 | let tmp_path = temp_dir().join("mmap_test.txt"); 365 | let tmp_path = tmp_path.to_str().unwrap(); 366 | std::fs::write(tmp_path, b"hellow, word").unwrap(); 367 | info!("path: {}", tmp_path); 368 | let mut vlog = LogFile::new(tmp_path).unwrap(); 369 | vlog.fd.take(); 370 | vlog.fd = Some(create_synced_file(tmp_path, true).unwrap()); 371 | info!( 372 | "{},{:?}", 373 | vlog.sz, 374 | String::from_utf8_lossy(vlog.mmap_slice()) 375 | ); 376 | vlog.set_write(1024).unwrap(); 377 | // vlog.fd.as_mut().unwrap().write_all(b"foobat").unwrap(); 378 | // vlog.fd.as_mut().unwrap().sync_all().unwrap(); 379 | // vlog.mut_mmap().flush_async().unwrap(); 380 | { 381 | let mut buffer = vlog._mmap.as_mut().unwrap(); 382 | let mut buffer = buffer.get_mut_mmap_ref(); 383 | let mut wt = buffer.as_mut(); 384 | wt.write_all(b"1234").unwrap(); 385 | } 386 | info!( 387 | "{},{:?}", 388 | vlog.sz, 389 | String::from_utf8_lossy(vlog.mmap_slice()) 390 | ); 391 | } 392 | -------------------------------------------------------------------------------- /src/options/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::skl::PtrAlign; 2 | use crate::value_log::Entry; 3 | use crate::y::{CAS_SIZE, META_SIZE, USER_META_SIZE}; 4 | use crate::{cals_size_with_align, Node, ValueStruct}; 5 | use rand::random; 6 | use std::env::temp_dir; 7 | 8 | /// Specifies how data in LSM table files and value log files should 9 | /// be loaded. 10 | #[derive(Debug, Clone, Copy, PartialEq)] 11 | pub enum FileLoadingMode { 12 | /// Indicates that files must be loaded using standard I/O 13 | FileIO, 14 | /// Indicates that files must be loaded into RAM 15 | LoadToRADM, 16 | /// Indicates that the file must be memory-mapped 17 | MemoryMap, 18 | } 19 | 20 | /// Params for creating DB object. 21 | #[derive(Debug, Clone)] 22 | pub struct Options { 23 | /// 1. Mandatory flags 24 | /// ------------------- 25 | /// Directory to store the data in. Should exist and be writable. 26 | pub dir: Box, 27 | /// Directory to store the value log in. Can be the same as Dir. Should 28 | /// exist and be writable. 29 | pub value_dir: Box, 30 | /// 2. Frequently modified flags 31 | /// ----------------------------- 32 | /// Sync all writes to disk. Setting this to true would slow down data 33 | /// loading significantly. 34 | pub sync_writes: bool, 35 | /// How should LSM tree be accessed. 36 | pub table_loading_mode: FileLoadingMode, 37 | /// 3. Flags that user might want to review 38 | /// ---------------------------------------- 39 | /// The following affect all levels of LSM tree. 40 | /// Each table (or file) is at most this size. 41 | pub max_table_size: u64, 42 | /// Equals SizeOf(Li+1)/SizeOf(Li). 43 | pub level_size_multiplier: u64, 44 | /// Maximum number of levels of compaction. 45 | pub max_levels: usize, 46 | /// If value size >= this threshold, only store value offsets in tree. 47 | pub value_threshold: usize, 48 | /// Maximum number of tables to keep in memory, before stalling. 49 | pub num_mem_tables: usize, 50 | /// The following affect how we handle LSM tree L0. 51 | /// Maximum number of Level 0 tables before we start compacting. 52 | pub num_level_zero_tables: usize, 53 | 54 | /// If we hit this number of Level 0 tables, we will stall until L0 is 55 | /// compacted away. 56 | pub num_level_zero_tables_stall: usize, 57 | 58 | /// Maximum total size for L1. 59 | pub level_one_size: u64, 60 | 61 | /// Size of single value log file. 62 | pub value_log_file_size: u64, 63 | 64 | /// Number of compaction workers to run concurrently. 65 | pub num_compactors: u64, 66 | 67 | /// 4. Flags for testing purposes 68 | /// ------------------------------ 69 | /// Stops LSM tree from compactions. 70 | pub do_not_compact: bool, 71 | /// max entries in batch 72 | pub max_batch_count: u64, 73 | // max batch size in bytes 74 | pub max_batch_size: u64, 75 | } 76 | 77 | impl Options { 78 | // TODO FIXME 79 | pub fn estimate_size(&self, entry: &Entry) -> usize { 80 | let key_size = entry.key.len(); 81 | if entry.value.len() < self.value_threshold { 82 | key_size + entry.value.len() 83 | } else { 84 | let value_size = ValueStruct::header_size(); 85 | key_size + value_size 86 | } 87 | } 88 | 89 | /// Return the size of allocator arena 90 | pub fn arena_size(&self) -> u64 { 91 | self.max_table_size 92 | + self.max_batch_size 93 | + self.max_batch_count * (Node::size() as u64) 94 | } 95 | } 96 | 97 | impl Default for Options { 98 | fn default() -> Self { 99 | let id = random::(); 100 | Options { 101 | dir: Box::new(id.to_string()), 102 | value_dir: Box::new(id.to_string()), 103 | sync_writes: false, 104 | table_loading_mode: FileLoadingMode::LoadToRADM, 105 | max_table_size: 64 << 20, 106 | level_size_multiplier: 10, 107 | max_levels: 7, 108 | value_threshold: 20, 109 | num_mem_tables: 5, 110 | num_level_zero_tables: 5, 111 | num_level_zero_tables_stall: 10, 112 | level_one_size: 256 << 20, 113 | value_log_file_size: 1 << 30, 114 | num_compactors: 3, 115 | do_not_compact: false, 116 | max_batch_count: 200, 117 | max_batch_size: 1 << 13, 118 | } 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/pb/backup.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Dgraph Labs, Inc. and Contributors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Use protos/gen.sh to generate .pb.go files. 18 | syntax = "proto3"; 19 | 20 | package protos; 21 | 22 | message KVPair { 23 | bytes key = 1; 24 | bytes value = 2; 25 | bytes userMeta = 3; 26 | } -------------------------------------------------------------------------------- /src/pb/backup.rs: -------------------------------------------------------------------------------- 1 | // This file is generated by rust-protobuf 3.0.0-alpha.2. Do not edit 2 | // .proto file is parsed by protoc 23.3 3 | // @generated 4 | 5 | // https://github.com/rust-lang/rust-clippy/issues/702 6 | #![allow(unknown_lints)] 7 | #![allow(clippy::all)] 8 | 9 | #![allow(unused_attributes)] 10 | #![cfg_attr(rustfmt, rustfmt::skip)] 11 | 12 | #![allow(box_pointers)] 13 | #![allow(dead_code)] 14 | #![allow(missing_docs)] 15 | #![allow(non_camel_case_types)] 16 | #![allow(non_snake_case)] 17 | #![allow(non_upper_case_globals)] 18 | #![allow(trivial_casts)] 19 | #![allow(unused_results)] 20 | #![allow(unused_mut)] 21 | 22 | //! Generated file from `src/pb/backup.proto` 23 | 24 | /// Generated files are compatible only with the same version 25 | /// of protobuf runtime. 26 | const _PROTOBUF_VERSION_CHECK: () = ::protobuf::VERSION_3_0_0_ALPHA_2; 27 | 28 | #[derive(PartialEq,Clone,Default)] 29 | pub struct KVPair { 30 | // message fields 31 | pub key: ::std::vec::Vec, 32 | pub value: ::std::vec::Vec, 33 | pub userMeta: ::std::vec::Vec, 34 | // special fields 35 | pub unknown_fields: ::protobuf::UnknownFields, 36 | pub cached_size: ::protobuf::rt::CachedSize, 37 | } 38 | 39 | impl<'a> ::std::default::Default for &'a KVPair { 40 | fn default() -> &'a KVPair { 41 | ::default_instance() 42 | } 43 | } 44 | 45 | impl KVPair { 46 | pub fn new() -> KVPair { 47 | ::std::default::Default::default() 48 | } 49 | 50 | fn generated_message_descriptor_data() -> ::protobuf::reflect::GeneratedMessageDescriptorData { 51 | let mut fields = ::std::vec::Vec::new(); 52 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 53 | "key", 54 | |m: &KVPair| { &m.key }, 55 | |m: &mut KVPair| { &mut m.key }, 56 | )); 57 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 58 | "value", 59 | |m: &KVPair| { &m.value }, 60 | |m: &mut KVPair| { &mut m.value }, 61 | )); 62 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 63 | "userMeta", 64 | |m: &KVPair| { &m.userMeta }, 65 | |m: &mut KVPair| { &mut m.userMeta }, 66 | )); 67 | ::protobuf::reflect::GeneratedMessageDescriptorData::new_2::( 68 | "KVPair", 69 | 0, 70 | fields, 71 | ) 72 | } 73 | } 74 | 75 | impl ::protobuf::Message for KVPair { 76 | fn is_initialized(&self) -> bool { 77 | true 78 | } 79 | 80 | fn merge_from(&mut self, is: &mut ::protobuf::CodedInputStream<'_>) -> ::protobuf::ProtobufResult<()> { 81 | while !is.eof()? { 82 | let (field_number, wire_type) = is.read_tag_unpack()?; 83 | match field_number { 84 | 1 => { 85 | if wire_type != ::protobuf::wire_format::WireTypeLengthDelimited { 86 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 87 | } 88 | self.key = is.read_bytes()?; 89 | }, 90 | 2 => { 91 | if wire_type != ::protobuf::wire_format::WireTypeLengthDelimited { 92 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 93 | } 94 | self.value = is.read_bytes()?; 95 | }, 96 | 3 => { 97 | if wire_type != ::protobuf::wire_format::WireTypeLengthDelimited { 98 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 99 | } 100 | self.userMeta = is.read_bytes()?; 101 | }, 102 | _ => { 103 | ::protobuf::rt::read_unknown_or_skip_group(field_number, wire_type, is, self.mut_unknown_fields())?; 104 | }, 105 | }; 106 | } 107 | ::std::result::Result::Ok(()) 108 | } 109 | 110 | // Compute sizes of nested messages 111 | #[allow(unused_variables)] 112 | fn compute_size(&self) -> u32 { 113 | let mut my_size = 0; 114 | if !self.key.is_empty() { 115 | my_size += ::protobuf::rt::bytes_size(1, &self.key); 116 | } 117 | if !self.value.is_empty() { 118 | my_size += ::protobuf::rt::bytes_size(2, &self.value); 119 | } 120 | if !self.userMeta.is_empty() { 121 | my_size += ::protobuf::rt::bytes_size(3, &self.userMeta); 122 | } 123 | my_size += ::protobuf::rt::unknown_fields_size(self.get_unknown_fields()); 124 | self.cached_size.set(my_size); 125 | my_size 126 | } 127 | 128 | fn write_to_with_cached_sizes(&self, os: &mut ::protobuf::CodedOutputStream<'_>) -> ::protobuf::ProtobufResult<()> { 129 | if !self.key.is_empty() { 130 | os.write_bytes(1, &self.key)?; 131 | } 132 | if !self.value.is_empty() { 133 | os.write_bytes(2, &self.value)?; 134 | } 135 | if !self.userMeta.is_empty() { 136 | os.write_bytes(3, &self.userMeta)?; 137 | } 138 | os.write_unknown_fields(self.get_unknown_fields())?; 139 | ::std::result::Result::Ok(()) 140 | } 141 | 142 | fn get_cached_size(&self) -> u32 { 143 | self.cached_size.get() 144 | } 145 | 146 | fn get_unknown_fields(&self) -> &::protobuf::UnknownFields { 147 | &self.unknown_fields 148 | } 149 | 150 | fn mut_unknown_fields(&mut self) -> &mut ::protobuf::UnknownFields { 151 | &mut self.unknown_fields 152 | } 153 | 154 | fn new() -> KVPair { 155 | KVPair::new() 156 | } 157 | 158 | fn descriptor_static() -> ::protobuf::reflect::MessageDescriptor { 159 | ::protobuf::reflect::MessageDescriptor::new_generated_2(file_descriptor(), 0) 160 | } 161 | 162 | fn default_instance() -> &'static KVPair { 163 | static instance: KVPair = KVPair { 164 | key: ::std::vec::Vec::new(), 165 | value: ::std::vec::Vec::new(), 166 | userMeta: ::std::vec::Vec::new(), 167 | unknown_fields: ::protobuf::UnknownFields::new(), 168 | cached_size: ::protobuf::rt::CachedSize::new(), 169 | }; 170 | &instance 171 | } 172 | } 173 | 174 | impl ::protobuf::Clear for KVPair { 175 | fn clear(&mut self) { 176 | self.key.clear(); 177 | self.value.clear(); 178 | self.userMeta.clear(); 179 | self.unknown_fields.clear(); 180 | } 181 | } 182 | 183 | impl ::std::fmt::Debug for KVPair { 184 | fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { 185 | ::protobuf::text_format::fmt(self, f) 186 | } 187 | } 188 | 189 | impl ::protobuf::reflect::ProtobufValue for KVPair { 190 | type RuntimeType = ::protobuf::reflect::runtime_types::RuntimeTypeMessage; 191 | } 192 | 193 | static file_descriptor_proto_data: &'static [u8] = b"\ 194 | \n\x13src/pb/backup.proto\x12\x06protos\"L\n\x06KVPair\x12\x10\n\x03key\ 195 | \x18\x01\x20\x01(\x0cR\x03key\x12\x14\n\x05value\x18\x02\x20\x01(\x0cR\ 196 | \x05value\x12\x1a\n\x08userMeta\x18\x03\x20\x01(\x0cR\x08userMetab\x06pr\ 197 | oto3\ 198 | "; 199 | 200 | /// `FileDescriptorProto` object which was a source for this generated file 201 | pub fn file_descriptor_proto() -> &'static ::protobuf::descriptor::FileDescriptorProto { 202 | static file_descriptor_proto_lazy: ::protobuf::rt::LazyV2<::protobuf::descriptor::FileDescriptorProto> = ::protobuf::rt::LazyV2::INIT; 203 | file_descriptor_proto_lazy.get(|| { 204 | ::protobuf::Message::parse_from_bytes(file_descriptor_proto_data).unwrap() 205 | }) 206 | } 207 | 208 | /// `FileDescriptor` object which allows dynamic access to files 209 | pub fn file_descriptor() -> ::protobuf::reflect::FileDescriptor { 210 | static file_descriptor_lazy: ::protobuf::rt::LazyV2<::protobuf::reflect::GeneratedFileDescriptor> = ::protobuf::rt::LazyV2::INIT; 211 | let file_descriptor = file_descriptor_lazy.get(|| { 212 | let mut deps = ::std::vec::Vec::new(); 213 | let mut messages = ::std::vec::Vec::new(); 214 | messages.push(KVPair::generated_message_descriptor_data()); 215 | let mut enums = ::std::vec::Vec::new(); 216 | ::protobuf::reflect::GeneratedFileDescriptor::new_generated( 217 | file_descriptor_proto(), 218 | deps, 219 | messages, 220 | enums, 221 | ) 222 | }); 223 | ::protobuf::reflect::FileDescriptor::new_generated_2(file_descriptor) 224 | } 225 | -------------------------------------------------------------------------------- /src/pb/badgerpb3.proto: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2017 Dgraph Labs, Inc. and Contributors 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | // Use protos/gen.sh to generate .pb.go files. 18 | syntax = "proto3"; 19 | 20 | package badgerpb3; 21 | 22 | message ManifestChangeSet { 23 | // A set of changes that are applied atomically. 24 | repeated ManifestChange changes = 1; 25 | } 26 | 27 | message ManifestChange { 28 | uint64 id = 1; 29 | enum Operation { 30 | CREATE = 0; 31 | DELETE = 1; 32 | } 33 | 34 | Operation op = 2; 35 | uint32 level = 3; // Only used for CREATE 36 | } -------------------------------------------------------------------------------- /src/pb/badgerpb3.rs: -------------------------------------------------------------------------------- 1 | // This file is generated by rust-protobuf 3.0.0-alpha.2. Do not edit 2 | // .proto file is parsed by protoc 23.3 3 | // @generated 4 | 5 | // https://github.com/rust-lang/rust-clippy/issues/702 6 | #![allow(unknown_lints)] 7 | #![allow(clippy::all)] 8 | 9 | #![allow(unused_attributes)] 10 | #![cfg_attr(rustfmt, rustfmt::skip)] 11 | 12 | #![allow(box_pointers)] 13 | #![allow(dead_code)] 14 | #![allow(missing_docs)] 15 | #![allow(non_camel_case_types)] 16 | #![allow(non_snake_case)] 17 | #![allow(non_upper_case_globals)] 18 | #![allow(trivial_casts)] 19 | #![allow(unused_results)] 20 | #![allow(unused_mut)] 21 | 22 | //! Generated file from `src/pb/badgerpb3.proto` 23 | 24 | /// Generated files are compatible only with the same version 25 | /// of protobuf runtime. 26 | const _PROTOBUF_VERSION_CHECK: () = ::protobuf::VERSION_3_0_0_ALPHA_2; 27 | 28 | #[derive(PartialEq,Clone,Default)] 29 | pub struct ManifestChangeSet { 30 | // message fields 31 | pub changes: ::std::vec::Vec, 32 | // special fields 33 | pub unknown_fields: ::protobuf::UnknownFields, 34 | pub cached_size: ::protobuf::rt::CachedSize, 35 | } 36 | 37 | impl<'a> ::std::default::Default for &'a ManifestChangeSet { 38 | fn default() -> &'a ManifestChangeSet { 39 | ::default_instance() 40 | } 41 | } 42 | 43 | impl ManifestChangeSet { 44 | pub fn new() -> ManifestChangeSet { 45 | ::std::default::Default::default() 46 | } 47 | 48 | fn generated_message_descriptor_data() -> ::protobuf::reflect::GeneratedMessageDescriptorData { 49 | let mut fields = ::std::vec::Vec::new(); 50 | fields.push(::protobuf::reflect::rt::v2::make_vec_simpler_accessor::<_, _>( 51 | "changes", 52 | |m: &ManifestChangeSet| { &m.changes }, 53 | |m: &mut ManifestChangeSet| { &mut m.changes }, 54 | )); 55 | ::protobuf::reflect::GeneratedMessageDescriptorData::new_2::( 56 | "ManifestChangeSet", 57 | 0, 58 | fields, 59 | ) 60 | } 61 | } 62 | 63 | impl ::protobuf::Message for ManifestChangeSet { 64 | fn is_initialized(&self) -> bool { 65 | for v in &self.changes { 66 | if !v.is_initialized() { 67 | return false; 68 | } 69 | }; 70 | true 71 | } 72 | 73 | fn merge_from(&mut self, is: &mut ::protobuf::CodedInputStream<'_>) -> ::protobuf::ProtobufResult<()> { 74 | while !is.eof()? { 75 | let (field_number, wire_type) = is.read_tag_unpack()?; 76 | match field_number { 77 | 1 => { 78 | ::protobuf::rt::read_repeated_message_into_vec(wire_type, is, &mut self.changes)?; 79 | }, 80 | _ => { 81 | ::protobuf::rt::read_unknown_or_skip_group(field_number, wire_type, is, self.mut_unknown_fields())?; 82 | }, 83 | }; 84 | } 85 | ::std::result::Result::Ok(()) 86 | } 87 | 88 | // Compute sizes of nested messages 89 | #[allow(unused_variables)] 90 | fn compute_size(&self) -> u32 { 91 | let mut my_size = 0; 92 | for value in &self.changes { 93 | let len = value.compute_size(); 94 | my_size += 1 + ::protobuf::rt::compute_raw_varint32_size(len) + len; 95 | }; 96 | my_size += ::protobuf::rt::unknown_fields_size(self.get_unknown_fields()); 97 | self.cached_size.set(my_size); 98 | my_size 99 | } 100 | 101 | fn write_to_with_cached_sizes(&self, os: &mut ::protobuf::CodedOutputStream<'_>) -> ::protobuf::ProtobufResult<()> { 102 | for v in &self.changes { 103 | ::protobuf::rt::write_message_field_with_cached_size(1, v, os)?; 104 | }; 105 | os.write_unknown_fields(self.get_unknown_fields())?; 106 | ::std::result::Result::Ok(()) 107 | } 108 | 109 | fn get_cached_size(&self) -> u32 { 110 | self.cached_size.get() 111 | } 112 | 113 | fn get_unknown_fields(&self) -> &::protobuf::UnknownFields { 114 | &self.unknown_fields 115 | } 116 | 117 | fn mut_unknown_fields(&mut self) -> &mut ::protobuf::UnknownFields { 118 | &mut self.unknown_fields 119 | } 120 | 121 | fn new() -> ManifestChangeSet { 122 | ManifestChangeSet::new() 123 | } 124 | 125 | fn descriptor_static() -> ::protobuf::reflect::MessageDescriptor { 126 | ::protobuf::reflect::MessageDescriptor::new_generated_2(file_descriptor(), 0) 127 | } 128 | 129 | fn default_instance() -> &'static ManifestChangeSet { 130 | static instance: ManifestChangeSet = ManifestChangeSet { 131 | changes: ::std::vec::Vec::new(), 132 | unknown_fields: ::protobuf::UnknownFields::new(), 133 | cached_size: ::protobuf::rt::CachedSize::new(), 134 | }; 135 | &instance 136 | } 137 | } 138 | 139 | impl ::protobuf::Clear for ManifestChangeSet { 140 | fn clear(&mut self) { 141 | self.changes.clear(); 142 | self.unknown_fields.clear(); 143 | } 144 | } 145 | 146 | impl ::std::fmt::Debug for ManifestChangeSet { 147 | fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { 148 | ::protobuf::text_format::fmt(self, f) 149 | } 150 | } 151 | 152 | impl ::protobuf::reflect::ProtobufValue for ManifestChangeSet { 153 | type RuntimeType = ::protobuf::reflect::runtime_types::RuntimeTypeMessage; 154 | } 155 | 156 | #[derive(PartialEq,Clone,Default)] 157 | pub struct ManifestChange { 158 | // message fields 159 | pub id: u64, 160 | pub op: ::protobuf::ProtobufEnumOrUnknown, 161 | pub level: u32, 162 | // special fields 163 | pub unknown_fields: ::protobuf::UnknownFields, 164 | pub cached_size: ::protobuf::rt::CachedSize, 165 | } 166 | 167 | impl<'a> ::std::default::Default for &'a ManifestChange { 168 | fn default() -> &'a ManifestChange { 169 | ::default_instance() 170 | } 171 | } 172 | 173 | impl ManifestChange { 174 | pub fn new() -> ManifestChange { 175 | ::std::default::Default::default() 176 | } 177 | 178 | fn generated_message_descriptor_data() -> ::protobuf::reflect::GeneratedMessageDescriptorData { 179 | let mut fields = ::std::vec::Vec::new(); 180 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 181 | "id", 182 | |m: &ManifestChange| { &m.id }, 183 | |m: &mut ManifestChange| { &mut m.id }, 184 | )); 185 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 186 | "op", 187 | |m: &ManifestChange| { &m.op }, 188 | |m: &mut ManifestChange| { &mut m.op }, 189 | )); 190 | fields.push(::protobuf::reflect::rt::v2::make_simpler_field_accessor::<_, _>( 191 | "level", 192 | |m: &ManifestChange| { &m.level }, 193 | |m: &mut ManifestChange| { &mut m.level }, 194 | )); 195 | ::protobuf::reflect::GeneratedMessageDescriptorData::new_2::( 196 | "ManifestChange", 197 | 1, 198 | fields, 199 | ) 200 | } 201 | } 202 | 203 | impl ::protobuf::Message for ManifestChange { 204 | fn is_initialized(&self) -> bool { 205 | true 206 | } 207 | 208 | fn merge_from(&mut self, is: &mut ::protobuf::CodedInputStream<'_>) -> ::protobuf::ProtobufResult<()> { 209 | while !is.eof()? { 210 | let (field_number, wire_type) = is.read_tag_unpack()?; 211 | match field_number { 212 | 1 => { 213 | if wire_type != ::protobuf::wire_format::WireTypeVarint { 214 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 215 | } 216 | self.id = is.read_uint64()?; 217 | }, 218 | 2 => { 219 | if wire_type != ::protobuf::wire_format::WireTypeVarint { 220 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 221 | } 222 | self.op = is.read_enum_or_unknown()?; 223 | }, 224 | 3 => { 225 | if wire_type != ::protobuf::wire_format::WireTypeVarint { 226 | return ::std::result::Result::Err(::protobuf::rt::unexpected_wire_type(wire_type)); 227 | } 228 | self.level = is.read_uint32()?; 229 | }, 230 | _ => { 231 | ::protobuf::rt::read_unknown_or_skip_group(field_number, wire_type, is, self.mut_unknown_fields())?; 232 | }, 233 | }; 234 | } 235 | ::std::result::Result::Ok(()) 236 | } 237 | 238 | // Compute sizes of nested messages 239 | #[allow(unused_variables)] 240 | fn compute_size(&self) -> u32 { 241 | let mut my_size = 0; 242 | if self.id != 0 { 243 | my_size += ::protobuf::rt::value_size(1, self.id, ::protobuf::wire_format::WireTypeVarint); 244 | } 245 | if self.op != ::protobuf::ProtobufEnumOrUnknown::new(manifest_change::Operation::CREATE) { 246 | my_size += ::protobuf::rt::enum_or_unknown_size(2, self.op); 247 | } 248 | if self.level != 0 { 249 | my_size += ::protobuf::rt::value_size(3, self.level, ::protobuf::wire_format::WireTypeVarint); 250 | } 251 | my_size += ::protobuf::rt::unknown_fields_size(self.get_unknown_fields()); 252 | self.cached_size.set(my_size); 253 | my_size 254 | } 255 | 256 | fn write_to_with_cached_sizes(&self, os: &mut ::protobuf::CodedOutputStream<'_>) -> ::protobuf::ProtobufResult<()> { 257 | if self.id != 0 { 258 | os.write_uint64(1, self.id)?; 259 | } 260 | if self.op != ::protobuf::ProtobufEnumOrUnknown::new(manifest_change::Operation::CREATE) { 261 | os.write_enum(2, ::protobuf::ProtobufEnumOrUnknown::value(&self.op))?; 262 | } 263 | if self.level != 0 { 264 | os.write_uint32(3, self.level)?; 265 | } 266 | os.write_unknown_fields(self.get_unknown_fields())?; 267 | ::std::result::Result::Ok(()) 268 | } 269 | 270 | fn get_cached_size(&self) -> u32 { 271 | self.cached_size.get() 272 | } 273 | 274 | fn get_unknown_fields(&self) -> &::protobuf::UnknownFields { 275 | &self.unknown_fields 276 | } 277 | 278 | fn mut_unknown_fields(&mut self) -> &mut ::protobuf::UnknownFields { 279 | &mut self.unknown_fields 280 | } 281 | 282 | fn new() -> ManifestChange { 283 | ManifestChange::new() 284 | } 285 | 286 | fn descriptor_static() -> ::protobuf::reflect::MessageDescriptor { 287 | ::protobuf::reflect::MessageDescriptor::new_generated_2(file_descriptor(), 1) 288 | } 289 | 290 | fn default_instance() -> &'static ManifestChange { 291 | static instance: ManifestChange = ManifestChange { 292 | id: 0, 293 | op: ::protobuf::ProtobufEnumOrUnknown::from_i32(0), 294 | level: 0, 295 | unknown_fields: ::protobuf::UnknownFields::new(), 296 | cached_size: ::protobuf::rt::CachedSize::new(), 297 | }; 298 | &instance 299 | } 300 | } 301 | 302 | impl ::protobuf::Clear for ManifestChange { 303 | fn clear(&mut self) { 304 | self.id = 0; 305 | self.op = ::protobuf::ProtobufEnumOrUnknown::new(manifest_change::Operation::CREATE); 306 | self.level = 0; 307 | self.unknown_fields.clear(); 308 | } 309 | } 310 | 311 | impl ::std::fmt::Debug for ManifestChange { 312 | fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { 313 | ::protobuf::text_format::fmt(self, f) 314 | } 315 | } 316 | 317 | impl ::protobuf::reflect::ProtobufValue for ManifestChange { 318 | type RuntimeType = ::protobuf::reflect::runtime_types::RuntimeTypeMessage; 319 | } 320 | 321 | /// Nested message and enums of message `ManifestChange` 322 | pub mod manifest_change { 323 | #[derive(Clone,Copy,PartialEq,Eq,Debug,Hash)] 324 | pub enum Operation { 325 | CREATE = 0, 326 | DELETE = 1, 327 | } 328 | 329 | impl ::protobuf::ProtobufEnum for Operation { 330 | fn value(&self) -> i32 { 331 | *self as i32 332 | } 333 | 334 | fn from_i32(value: i32) -> ::std::option::Option { 335 | match value { 336 | 0 => ::std::option::Option::Some(Operation::CREATE), 337 | 1 => ::std::option::Option::Some(Operation::DELETE), 338 | _ => ::std::option::Option::None 339 | } 340 | } 341 | 342 | fn values() -> &'static [Self] { 343 | static values: &'static [Operation] = &[ 344 | Operation::CREATE, 345 | Operation::DELETE, 346 | ]; 347 | values 348 | } 349 | 350 | fn enum_descriptor_static() -> ::protobuf::reflect::EnumDescriptor { 351 | ::protobuf::reflect::EnumDescriptor::new_generated_2(super::file_descriptor(), 0) 352 | } 353 | } 354 | 355 | impl ::std::default::Default for Operation { 356 | fn default() -> Self { 357 | Operation::CREATE 358 | } 359 | } 360 | 361 | impl ::protobuf::reflect::ProtobufValue for Operation { 362 | type RuntimeType = ::protobuf::reflect::runtime_types::RuntimeTypeEnum; 363 | } 364 | 365 | impl Operation { 366 | pub(in super) fn generated_enum_descriptor_data() -> ::protobuf::reflect::GeneratedEnumDescriptorData { 367 | ::protobuf::reflect::GeneratedEnumDescriptorData::new_2::("ManifestChange.Operation", 0) 368 | } 369 | } 370 | } 371 | 372 | static file_descriptor_proto_data: &'static [u8] = b"\ 373 | \n\x16src/pb/badgerpb3.proto\x12\tbadgerpb3\"H\n\x11ManifestChangeSet\ 374 | \x123\n\x07changes\x18\x01\x20\x03(\x0b2\x19.badgerpb3.ManifestChangeR\ 375 | \x07changes\"\x90\x01\n\x0eManifestChange\x12\x0e\n\x02id\x18\x01\x20\ 376 | \x01(\x04R\x02id\x123\n\x02op\x18\x02\x20\x01(\x0e2#.badgerpb3.ManifestC\ 377 | hange.OperationR\x02op\x12\x14\n\x05level\x18\x03\x20\x01(\rR\x05level\"\ 378 | #\n\tOperation\x12\n\n\x06CREATE\x10\0\x12\n\n\x06DELETE\x10\x01b\x06pro\ 379 | to3\ 380 | "; 381 | 382 | /// `FileDescriptorProto` object which was a source for this generated file 383 | pub fn file_descriptor_proto() -> &'static ::protobuf::descriptor::FileDescriptorProto { 384 | static file_descriptor_proto_lazy: ::protobuf::rt::LazyV2<::protobuf::descriptor::FileDescriptorProto> = ::protobuf::rt::LazyV2::INIT; 385 | file_descriptor_proto_lazy.get(|| { 386 | ::protobuf::Message::parse_from_bytes(file_descriptor_proto_data).unwrap() 387 | }) 388 | } 389 | 390 | /// `FileDescriptor` object which allows dynamic access to files 391 | pub fn file_descriptor() -> ::protobuf::reflect::FileDescriptor { 392 | static file_descriptor_lazy: ::protobuf::rt::LazyV2<::protobuf::reflect::GeneratedFileDescriptor> = ::protobuf::rt::LazyV2::INIT; 393 | let file_descriptor = file_descriptor_lazy.get(|| { 394 | let mut deps = ::std::vec::Vec::new(); 395 | let mut messages = ::std::vec::Vec::new(); 396 | messages.push(ManifestChangeSet::generated_message_descriptor_data()); 397 | messages.push(ManifestChange::generated_message_descriptor_data()); 398 | let mut enums = ::std::vec::Vec::new(); 399 | enums.push(manifest_change::Operation::generated_enum_descriptor_data()); 400 | ::protobuf::reflect::GeneratedFileDescriptor::new_generated( 401 | file_descriptor_proto(), 402 | deps, 403 | messages, 404 | enums, 405 | ) 406 | }); 407 | ::protobuf::reflect::FileDescriptor::new_generated_2(file_descriptor) 408 | } 409 | -------------------------------------------------------------------------------- /src/pb/mod.rs: -------------------------------------------------------------------------------- 1 | // @generated 2 | 3 | use protobuf::Message; 4 | use crate::manifest::ManifestChangeBuilder; 5 | use crate::pb::badgerpb3::ManifestChangeSet; 6 | use crate::Result; 7 | // use quick_protobuf::MessageWrite; 8 | 9 | pub mod badgerpb3; 10 | pub mod backup; 11 | 12 | pub(crate) fn convert_manifest_set_to_vec(mf_set: &ManifestChangeSet) -> Vec { 13 | let mut buffer = vec![]; 14 | mf_set.write_to_vec(&mut buffer).unwrap(); 15 | buffer 16 | } 17 | 18 | pub(crate) fn parse_manifest_set_from_vec(buffer: &[u8]) -> Result { 19 | let set: ManifestChangeSet = protobuf::Message::parse_from_bytes(buffer).map_err(|err| crate::Error::from(format!("{}", err)))?; 20 | Ok(set) 21 | } 22 | 23 | #[test] 24 | fn enc_dec() { 25 | let mut mf = ManifestChangeSet::default(); 26 | mf.changes 27 | .extend(vec![ManifestChangeBuilder::new(1).build()]); 28 | let buffer = convert_manifest_set_to_vec(&mf); 29 | let got = parse_manifest_set_from_vec(&buffer).unwrap(); 30 | assert_eq!(got, mf); 31 | } 32 | -------------------------------------------------------------------------------- /src/skl/alloc.rs: -------------------------------------------------------------------------------- 1 | use crate::{cals_size_with_align}; 2 | use std::fmt::Debug; 3 | use std::mem::{ManuallyDrop}; 4 | use std::sync::atomic::{AtomicUsize, Ordering}; 5 | 6 | pub(crate) const PtrAlign: usize = 7; 7 | 8 | pub trait Allocate: Send + Sync { 9 | #[inline] 10 | fn alloc(&self, size: usize) -> usize; 11 | #[inline] 12 | fn alloc_rev(&self, size: usize) -> usize { 13 | todo!() 14 | } 15 | #[inline] 16 | fn size(&self) -> usize; 17 | #[inline] 18 | unsafe fn get_mut(&self, offset: usize) -> *mut T; 19 | #[inline] 20 | fn offset(&self, ptr: *const T) -> usize; 21 | #[inline] 22 | fn len(&self) -> usize; 23 | #[inline] 24 | fn cap(&self) -> usize; 25 | } 26 | 27 | #[derive(Debug)] 28 | pub struct DoubleAlloc { 29 | pub(crate) head: AtomicUsize, 30 | pub(crate) tail: AtomicUsize, 31 | ptr: ManuallyDrop>, 32 | _cap: usize, 33 | } 34 | 35 | unsafe impl Send for DoubleAlloc {} 36 | 37 | impl Drop for DoubleAlloc { 38 | fn drop(&mut self) { 39 | unsafe { 40 | ManuallyDrop::drop(&mut self.ptr); 41 | } 42 | } 43 | } 44 | 45 | impl Allocate for DoubleAlloc { 46 | fn alloc(&self, size: usize) -> usize { 47 | let free_count = self.free_count(); 48 | // info!("{}", free_count); 49 | assert!(free_count > size, "less memory"); 50 | let offset = self.head.fetch_add(size, Ordering::SeqCst); 51 | offset 52 | } 53 | 54 | fn alloc_rev(&self, size: usize) -> usize { 55 | let free_count = self.free_count(); 56 | assert!(free_count > size, "less memory"); 57 | let offset = self.tail.fetch_sub(size, Ordering::SeqCst); 58 | offset - size 59 | } 60 | 61 | fn size(&self) -> usize { 62 | todo!() 63 | } 64 | 65 | unsafe fn get_mut(&self, offset: usize) -> *mut T { 66 | let ptr = self.ptr.as_ptr() as *mut u8; 67 | ptr.add(offset).cast::() 68 | } 69 | 70 | fn offset(&self, ptr: *const T) -> usize { 71 | let base_ptr = self.ptr.as_ptr() as usize; 72 | let offset_ptr = ptr as usize; 73 | offset_ptr - base_ptr 74 | } 75 | 76 | fn len(&self) -> usize { 77 | self.cap() - (self.tail.load(Ordering::SeqCst) - self.head.load(Ordering::SeqCst)) 78 | } 79 | 80 | fn cap(&self) -> usize { 81 | self._cap 82 | } 83 | } 84 | 85 | impl DoubleAlloc { 86 | pub(crate) fn new(n: usize) -> DoubleAlloc { 87 | let n = cals_size_with_align(n, PtrAlign); 88 | assert_eq!(n % (PtrAlign + 1), 0); 89 | DoubleAlloc { 90 | head: AtomicUsize::new(PtrAlign + 1), 91 | tail: AtomicUsize::new(n), 92 | ptr: ManuallyDrop::new(vec![0u8; n]), 93 | _cap: n, 94 | } 95 | } 96 | 97 | fn free_count(&self) -> usize { 98 | let head = self.head.load(Ordering::SeqCst); 99 | let tail = self.tail.load(Ordering::SeqCst); 100 | assert!(head < tail, "head({}) should be lt tail({})", head, tail); 101 | tail - head 102 | } 103 | } 104 | 105 | #[test] 106 | fn t() { 107 | let size = (1 + 0) & !0; 108 | println!("{}, {}, {}", (1 + 0) & !0, (0 + 0) & !0, (3 + 0) & !0); 109 | } 110 | -------------------------------------------------------------------------------- /src/skl/arena.rs: -------------------------------------------------------------------------------- 1 | use crate::skl::node::Node; 2 | use crate::skl::PtrAlign; 3 | use crate::y::ValueStruct; 4 | use crate::{Allocate, DoubleAlloc}; 5 | use std::mem::size_of; 6 | use std::ptr::{slice_from_raw_parts, slice_from_raw_parts_mut, NonNull}; 7 | use tracing::info; 8 | 9 | /// How to cals SkipList allocate size 10 | /// 8(zero-bit) + key + value + node*N 11 | 12 | /// `Arena` should be lock-free. 13 | #[derive(Debug)] 14 | pub struct Arena { 15 | alloc: DoubleAlloc, 16 | } 17 | 18 | impl Arena { 19 | pub(crate) fn new(n: usize) -> Self { 20 | assert!(n > 0); 21 | // Don't store data at position 0 in order to reverse offset = 0 as a kind 22 | // of nil pointer 23 | Self { 24 | alloc: DoubleAlloc::new(n + 1), 25 | } 26 | } 27 | 28 | pub(crate) fn size(&self) -> u32 { 29 | self.alloc.len() as u32 30 | } 31 | 32 | pub(crate) fn cap(&self) -> u32 { 33 | self.alloc.cap() as u32 34 | } 35 | 36 | pub(crate) fn free_size(&self) -> u32 { 37 | self.cap() - self.size() 38 | } 39 | 40 | pub(crate) fn valid(&self) -> bool { 41 | // !self.slice.ptr.is_empty() 42 | todo!() 43 | } 44 | 45 | // Returns a pointer to the node located at offset. If the offset is 46 | // zero, then the null node pointer is returned. 47 | pub(crate) fn get_node(&self, offset: usize) -> Option<&Node> { 48 | if offset == 0 { 49 | return None; 50 | } 51 | unsafe { self.alloc.get_mut::(offset).as_ref() } 52 | } 53 | 54 | pub(crate) fn get_mut_node(&self, offset: usize) -> Option<&mut Node> { 55 | if offset == 0 { 56 | return None; 57 | } 58 | unsafe { self.alloc.get_mut::(offset).as_mut() } 59 | } 60 | 61 | // Returns start location 62 | pub(crate) fn put_key(&self, key: &[u8]) -> u32 { 63 | let offset = self.alloc.alloc_rev(key.len()); 64 | let buffer = unsafe { self.alloc.get_mut::(offset) }; 65 | let buffer = unsafe { &mut *slice_from_raw_parts_mut(buffer, key.len()) }; 66 | buffer.copy_from_slice(key); 67 | offset as u32 68 | } 69 | 70 | // Put will *copy* val into arena. To make better use of this, reuse your input 71 | // val buffer. Returns an offset into buf. User is responsible for remembering 72 | // size of val. We could also store this size inside arena but the encoding and 73 | // decoding will incur some overhead. 74 | pub(crate) fn put_val(&self, v: &ValueStruct) -> (u32, u16) { 75 | let buf: Vec = v.into(); 76 | let offset = self.put_key(buf.as_slice()); 77 | (offset, buf.len() as u16) 78 | } 79 | 80 | // Returns byte slice at offset. 81 | pub(crate) fn get_key(&self, offset: u32, size: u16) -> &[u8] { 82 | let buffer = unsafe { self.alloc.get_mut::(offset as usize) }; 83 | unsafe { &*slice_from_raw_parts(buffer, size as usize) } 84 | } 85 | 86 | // Returns byte slice at offset. The given size should be just the value 87 | // size and should NOT include the meta bytes. 88 | pub(crate) fn get_val(&self, offset: u32, size: u16) -> ValueStruct { 89 | let buffer = self.get_key(offset, size); 90 | ValueStruct::from(buffer) 91 | } 92 | 93 | // Return byte slice at offset. 94 | // FIXME: 95 | pub(crate) fn put_node(&self, _height: isize) -> u32 { 96 | let offset = self.alloc.alloc(Node::align_size()); 97 | offset as u32 98 | } 99 | 100 | // Returns the offset of `node` in the arena. If the `node` pointer is 101 | // nil, then the zero offset is returned. 102 | pub(crate) fn get_node_offset(&self, node: *const Node) -> usize { 103 | if node.is_null() { 104 | return 0; 105 | } 106 | let offset = self.alloc.offset(node); 107 | offset 108 | } 109 | 110 | pub(crate) fn copy(&self) -> NonNull { 111 | let ptr = self as *const Self as *mut Self; 112 | NonNull::new(ptr).unwrap() 113 | } 114 | } 115 | 116 | #[cfg(test)] 117 | mod tests { 118 | use crate::skl::{PtrAlign, MAX_HEIGHT}; 119 | use crate::test_util::tracing_log; 120 | use crate::{cals_size_with_align, Arena, Node, SkipList, ValueStruct}; 121 | use log::info; 122 | use log::kv::{Key, value}; 123 | use prometheus::core::AtomicU64; 124 | use rand::{random, thread_rng, Rng}; 125 | use std::ptr; 126 | use std::sync::atomic::Ordering; 127 | use std::sync::Arc; 128 | use std::thread::spawn; 129 | use std::time::Duration; 130 | 131 | #[test] 132 | fn t_arena_key() { 133 | let arena = Arena::new(1 << 20); 134 | let keys = vec![vec![1, 2, 3], vec![4, 5, 6, 7, 90]]; 135 | let mut got = vec![]; 136 | for key in keys.iter() { 137 | got.push(arena.put_key(key)); 138 | } 139 | for (i, offset) in got.iter().enumerate() { 140 | let key = arena.get_key(*offset, keys[i].len() as u16); 141 | assert_eq!(key, keys[i]); 142 | } 143 | } 144 | 145 | #[test] 146 | fn t_arena_value() { 147 | let arena = Arena::new(1 << 20); 148 | let v = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 149 | let value = ValueStruct { 150 | meta: 1, 151 | user_meta: 1, 152 | cas_counter: 2, 153 | value: v, 154 | }; 155 | let (start, n) = arena.put_val(&value); 156 | let load_value = arena.get_val(start, n); 157 | assert_eq!(value, load_value); 158 | } 159 | 160 | #[test] 161 | fn t_arena_store_node() { 162 | let arena = Arena::new(1 << 20); 163 | let mut starts = vec![]; 164 | for i in 0..5 { 165 | let start = arena.put_node(i); 166 | let node = arena.get_mut_node(start as usize).unwrap(); 167 | node.height = i as u16; 168 | // node.value.fetch_add(i as u64, atomic::Ordering::Relaxed); 169 | starts.push((i, start)); 170 | } 171 | 172 | for (i, start) in starts { 173 | let node = arena.get_mut_node(start as usize).unwrap(); 174 | // let value = node.value.load(atomic::Ordering::Relaxed); 175 | assert_eq!(node.height, i as u16); 176 | // assert_eq!(value, i as u64); 177 | } 178 | 179 | let second_node = arena.get_node(Node::size()).unwrap(); 180 | let offset = arena.get_node_offset(second_node); 181 | assert_eq!(offset, Node::size()); 182 | } 183 | 184 | #[test] 185 | fn t_arena_currency() { 186 | let arena = Arc::new(Arena::new(1 << 20)); 187 | let mut waits = vec![]; 188 | for _i in 0..100 { 189 | let arena = Arc::clone(&arena); 190 | waits.push(spawn(move || arena.put_key(b"abc"))); 191 | } 192 | 193 | let mut offsets = waits 194 | .into_iter() 195 | .map(|join| join.join().unwrap()) 196 | .collect::>(); 197 | offsets.sort(); 198 | println!("offsets: {:?}", offsets); 199 | } 200 | 201 | #[test] 202 | fn t_arena_memory1() { 203 | let arena = Arena::new(1 << 20); 204 | struct Item<'a> { 205 | key: Vec, 206 | key_offset: usize, 207 | value: ValueStruct, 208 | value_offset: usize, 209 | node: &'a Node, 210 | node_offset: usize, 211 | } 212 | let mut kv = vec![]; 213 | for i in 0..1119000 { 214 | let key = vec![1u8; random::() % 18]; 215 | let value = vec![1u8; random::() % 10]; 216 | let value = ValueStruct::new(value, 9, 0, 0); 217 | if arena.cap() - 200 < arena.size() { 218 | break; 219 | } 220 | let key_offset = arena.put_key(&key); 221 | if arena.cap() - 200 < arena.size() { 222 | break; 223 | } 224 | let (value_offset, _) = arena.put_val(&value); 225 | if arena.cap() - 200 < arena.size() { 226 | break; 227 | } 228 | let offset = arena.put_node(0); 229 | let node = arena.get_mut_node(offset as usize).unwrap(); 230 | node.height = 12; 231 | node.key_offset = key_offset; 232 | node.key_size = key.len() as u16; 233 | // node.value.store(10, Ordering::SeqCst); 234 | for i in 0..node.tower.len() { 235 | node.tower[i].store(20, Ordering::SeqCst); 236 | } 237 | //println!("{}, {}, {}, {:?}", key_offset, value_offset, offset, node.tower); 238 | kv.push(Item { 239 | key: b"".to_vec(), 240 | key_offset: 0, 241 | value: ValueStruct::default(), 242 | value_offset: 0, 243 | node, 244 | node_offset: offset as usize, 245 | }) 246 | } 247 | // 248 | for el in kv.into_iter().enumerate() { 249 | let node = arena.get_node(el.1.node_offset).unwrap(); 250 | //println!("{}, {:?}", el.0, node); 251 | } 252 | } 253 | 254 | #[test] 255 | fn t_arena_memory_cals() { 256 | tracing_log(); 257 | let st = SkipList::new(1 << 9); 258 | let mut rng = thread_rng(); 259 | for i in 0..1000000 { 260 | let mut key = vec![1u8; random::() % 100]; 261 | rng.fill(&mut key[..]); 262 | let value = vec![1u8; random::() % 10]; 263 | let value = ValueStruct::new(value, 9, 0, 0); 264 | if st.arena.free_size() <= 2 * (key.len() + value.size() + Node::size()) as u32 { 265 | info!("skip it"); 266 | return; 267 | } 268 | st.put(&key, value.clone()); 269 | // info!( 270 | // " key_size: {}, value_size: {}, node_size: {}, cap:{}, len:{}, free:{}, head:{}, tail:{}", 271 | // key.len(), 272 | // value.size(), 273 | // Node::size(), 274 | // st.arena.cap(), 275 | // st.arena.size(), 276 | // st.arena.free_size(), 277 | // st.arena.alloc.head.load(Ordering::SeqCst), 278 | // st.arena.alloc.tail.load(Ordering::SeqCst), 279 | // ); 280 | // tokio::time::sleep(Duration::from_millis(200)).await; 281 | } 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /src/skl/cursor.rs: -------------------------------------------------------------------------------- 1 | use crate::skl::{node::Node, skip::SkipList}; 2 | use crate::y::iterator::{KeyValue, Xiterator}; 3 | use crate::y::ValueStruct; 4 | use std::cell::RefCell; 5 | 6 | /// An iterator over `SkipList` object. For new objects, you just 7 | /// need to initialize Iterator.List. 8 | pub struct Cursor<'a> { 9 | pub(crate) list: &'a SkipList, 10 | item: RefCell>, 11 | id: String, 12 | } 13 | 14 | impl<'a> Cursor<'a> { 15 | pub fn new(list: &'a SkipList) -> Cursor<'a> { 16 | Cursor { 17 | list, 18 | item: RefCell::new(Some(list.get_head())), 19 | id: format!("cursor"), 20 | } 21 | } 22 | 23 | /// Returns true if the iterator is positioned at a valid node. 24 | pub fn valid(&self) -> bool { 25 | self.item 26 | .borrow() 27 | .map_or(false, |node| !std::ptr::eq(node, self.list.get_head())) 28 | } 29 | 30 | /// Returns the key at the current position. 31 | pub fn key(&self) -> &[u8] { 32 | let node = self.item.borrow().unwrap(); 33 | self.list 34 | .arena_ref() 35 | .get_key(node.key_offset, node.key_size) 36 | } 37 | 38 | /// Return value. 39 | pub fn value(&self) -> ValueStruct { 40 | let node = self.item.borrow().unwrap(); 41 | let (value_offset, val_size) = node.get_value_offset(); 42 | self.list.arena_ref().get_val(value_offset, val_size) 43 | } 44 | 45 | /// Advances to the next position. 46 | pub fn next(&'a self) -> Option<&Node> { 47 | //assert!(self.valid()); 48 | let next = self.list.get_next(self.item.borrow().unwrap(), 0); 49 | *self.item.borrow_mut() = next; 50 | next 51 | } 52 | 53 | /// Advances to the previous position. 54 | pub fn prev(&'a self) -> Option<&Node> { 55 | //assert!(self.valid()); 56 | let (node, _) = self.list.find_near(self.key(), true, false); 57 | *self.item.borrow_mut() = node; 58 | node 59 | } 60 | 61 | /// Advance to the first entry with a key >= target. 62 | pub fn seek(&'a self, target: &[u8]) -> Option<&Node> { 63 | let (node, _) = self.list.find_near(target, false, true); // find >=. 64 | *self.item.borrow_mut() = node; 65 | node 66 | } 67 | 68 | /// Finds an entry with key <= target. 69 | pub fn seek_for_prev(&'a self, target: &[u8]) -> Option<&Node> { 70 | let (node, _) = self.list.find_near(target, true, true); // find <=. 71 | *self.item.borrow_mut() = node; 72 | node 73 | } 74 | 75 | /// Seeks position at the first entry in list. 76 | /// Final state of iterator is Valid() if list is not empty. 77 | pub fn seek_for_first(&'a self) -> Option<&'a Node> { 78 | let node = self.list.get_next(self.list.get_head(), 0); 79 | *self.item.borrow_mut() = node; 80 | node 81 | } 82 | 83 | /// Seeks position at the last entry in list. 84 | /// Final state of iterator is Valid() iff list is not empty. 85 | pub fn seek_for_last(&'a self) -> Option<&Node> { 86 | let node = unsafe { self.list.find_last() }; 87 | *self.item.borrow_mut() = node; 88 | node 89 | } 90 | 91 | // Must be call for every `Cursor` 92 | pub fn close(&self) { 93 | self.list.decr_ref(); 94 | } 95 | 96 | fn _peek(&self) -> Option<&'a Node> { 97 | let node = self.item.borrow(); 98 | if node.is_none() { 99 | return None; 100 | } 101 | let node = node.unwrap(); 102 | if std::ptr::eq(node, self.list.get_head()) { 103 | return None; 104 | } 105 | Some(node) 106 | } 107 | } 108 | 109 | pub struct CursorReverse<'a> { 110 | iter: &'a Cursor<'a>, 111 | reversed: RefCell, 112 | } 113 | 114 | impl<'a> Xiterator for CursorReverse<'a> { 115 | type Output = &'a Node; 116 | fn next(&self) -> Option { 117 | if !*(self.reversed.borrow()) { 118 | self.iter.next() 119 | } else { 120 | self.iter.prev() 121 | } 122 | } 123 | 124 | fn rewind(&self) -> Option { 125 | if !*(self.reversed.borrow()) { 126 | self.iter.seek_for_first() 127 | } else { 128 | self.iter.seek_for_last() 129 | } 130 | } 131 | 132 | fn seek(&self, key: &[u8]) -> Option { 133 | if !*(self.reversed.borrow()) { 134 | self.iter.seek(key) 135 | } else { 136 | self.iter.seek_for_prev(key) 137 | } 138 | } 139 | 140 | fn peek(&self) -> Option { 141 | self.iter._peek() 142 | } 143 | 144 | fn id(&self) -> String { 145 | self.iter.id.clone() 146 | } 147 | } 148 | 149 | impl KeyValue for CursorReverse<'_> { 150 | fn key(&self) -> &[u8] { 151 | self.iter.key() 152 | } 153 | 154 | fn value(&self) -> ValueStruct { 155 | self.iter.value() 156 | } 157 | } 158 | 159 | #[test] 160 | fn t_cursor() {} 161 | -------------------------------------------------------------------------------- /src/skl/mod.rs: -------------------------------------------------------------------------------- 1 | mod alloc; 2 | mod arena; 3 | mod cursor; 4 | mod node; 5 | mod skip; 6 | 7 | pub use alloc::*; 8 | pub use arena::Arena; 9 | pub use cursor::Cursor; 10 | pub use node::Node; 11 | pub use skip::*; 12 | 13 | const MAX_HEIGHT: usize = 20; 14 | const HEIGHT_INCREASE: u32 = u32::MAX / 3; 15 | -------------------------------------------------------------------------------- /src/skl/node.rs: -------------------------------------------------------------------------------- 1 | use crate::skl::arena::Arena; 2 | use crate::skl::{MAX_HEIGHT, PtrAlign}; 3 | use crate::y::ValueStruct; 4 | use std::mem::{align_of, size_of, size_of_val}; 5 | use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; 6 | 7 | #[derive(Debug)] 8 | #[repr(C)] 9 | pub struct Node { 10 | // A byte slice is 24 bytes. We are trying to save space here. 11 | // immutable. No need to lock to access key. 12 | pub(crate) key_offset: u32, 13 | // immutable. No need to lock to access key. 14 | pub(crate) key_size: u16, 15 | 16 | // Height of the tower. 17 | pub(crate) height: u16, 18 | 19 | // parts of the value are encoded as a single uint64 so that it 20 | // can be atomically loaded and stored: 21 | // value offset: uint32 (bits 0-31) 22 | // value size : uint16 (bits 32-47) 23 | pub(crate) value: AtomicU64, 24 | 25 | // Most nodes do not need to use the full height of the tower, since the 26 | // probability of each successive level decreases exponentially, Because 27 | // these elements are never accessed, the do not need to be allocated. 28 | // is deliberately truncated to not include unneeded tower elements. 29 | // 30 | // All accesses to elements should use CAS operations, with no need to lock. 31 | pub(crate) tower: [AtomicU32; MAX_HEIGHT], 32 | } 33 | 34 | impl Default for Node { 35 | fn default() -> Self { 36 | const TOWER: AtomicU32 = AtomicU32::new(0); 37 | let mut node = Node { 38 | key_offset: 0, 39 | key_size: 0, 40 | height: 0, 41 | value: AtomicU64::new(0), 42 | tower: [TOWER; MAX_HEIGHT], 43 | }; 44 | for i in 0..MAX_HEIGHT { 45 | node.tower[i] = AtomicU32::new(0); 46 | } 47 | node 48 | } 49 | } 50 | 51 | impl Node { 52 | pub(crate) fn new<'a>( 53 | arena: &'a mut Arena, 54 | key: &'a [u8], 55 | v: &'a ValueStruct, 56 | height: isize, 57 | ) -> &'a mut Node { 58 | let key_offset = arena.put_key(key); 59 | let (value_offset, value_size) = arena.put_val(v); 60 | // The base level is already allocated in the node struct. 61 | let offset = arena.put_node(height); 62 | let node = arena.get_mut_node(offset as usize).unwrap(); 63 | // 1: storage key 64 | node.key_offset = key_offset; 65 | node.key_size = key.len() as u16; 66 | // 2: storage value 67 | node.value.store( 68 | Self::encode_value(value_offset, value_size), 69 | Ordering::Relaxed, 70 | ); 71 | node.height = height as u16; 72 | node 73 | } 74 | 75 | pub(crate) const fn size() -> usize { 76 | size_of::() 77 | } 78 | 79 | pub(crate) const fn align_size() -> usize { 80 | (size_of::() + PtrAlign) & !PtrAlign 81 | } 82 | 83 | pub(crate) fn set_value(&self, arena: &Arena, v: &ValueStruct) { 84 | let (value_offset, value_size) = arena.put_val(v); 85 | let value = Self::encode_value(value_offset, value_size as u16); 86 | self.value.store(value, Ordering::Relaxed); 87 | } 88 | 89 | pub(crate) fn get_value_offset(&self) -> (u32, u16) { 90 | let value = self.value.load(Ordering::Acquire); 91 | Self::decode_value(value) 92 | } 93 | 94 | pub(crate) fn key<'a>(&'a self, arena: &'a Arena) -> &'a [u8] { 95 | arena.get_key(self.key_offset, self.key_size) 96 | } 97 | 98 | pub(crate) fn get_next_offset(&self, h: usize) -> u32 { 99 | self.tower[h].load(Ordering::Acquire) 100 | } 101 | 102 | pub(crate) fn cas_next_offset(&self, h: usize, old: u32, val: u32) -> bool { 103 | let ok = self.tower[h].compare_exchange(old, val, Ordering::Acquire, Ordering::Acquire); 104 | return ok.is_ok(); 105 | } 106 | 107 | #[inline] 108 | fn decode_value(value: u64) -> (u32, u16) { 109 | let value_offset = value as u32; 110 | let value_size = (value >> 32) as u16; 111 | (value_offset, value_size) 112 | } 113 | 114 | #[inline] 115 | fn encode_value(value_offset: u32, value_size: u16) -> u64 { 116 | ((value_size as u64) << 32) | (value_offset) as u64 117 | } 118 | } 119 | 120 | -------------------------------------------------------------------------------- /src/st_manager.rs: -------------------------------------------------------------------------------- 1 | use crate::options::Options; 2 | use crate::SkipList; 3 | 4 | use crossbeam_epoch::Shared; 5 | use drop_cell::defer; 6 | use log::info; 7 | use parking_lot::lock_api::RwLockWriteGuard; 8 | use parking_lot::RawRwLock; 9 | 10 | use std::sync::atomic::{AtomicUsize, Ordering}; 11 | use std::sync::Arc; 12 | 13 | type SkipListItem = crossbeam_epoch::Atomic; 14 | 15 | pub struct SkipListManager { 16 | // TODO use it lock skip_list_manager 17 | share_lock: parking_lot::RwLock<()>, 18 | mt: Option, 19 | imm: Arc>>, 20 | sz: Arc, 21 | mt_seq: Arc, 22 | } 23 | 24 | impl Default for SkipListManager { 25 | fn default() -> Self { 26 | SkipListManager { 27 | share_lock: parking_lot::RwLock::new(()), 28 | mt: None, 29 | imm: Arc::new(parking_lot::RwLock::new(vec![])), 30 | sz: Arc::new(AtomicUsize::new(0)), 31 | mt_seq: Arc::new(AtomicUsize::default()), 32 | } 33 | } 34 | } 35 | 36 | impl SkipListManager { 37 | pub fn new(sz: usize) -> SkipListManager { 38 | SkipListManager { 39 | share_lock: parking_lot::RwLock::new(()), 40 | mt: Some(SkipListItem::new(SkipList::new(sz))), 41 | imm: Arc::new(parking_lot::RwLock::new(vec![])), 42 | sz: Arc::new(AtomicUsize::new(sz)), 43 | mt_seq: Arc::new(AtomicUsize::new(0)), 44 | } 45 | } 46 | 47 | pub fn take<'a>(&'a self, p: &'a crossbeam_epoch::Guard) -> Shared<'a, SkipList> { 48 | // self.lock_exclusive(); 49 | // defer! {self.unlock_exclusive()} 50 | self.mt.as_ref().unwrap().load_consume(p) 51 | } 52 | 53 | pub fn mt_ref<'a>(&'a self, p: &'a crossbeam_epoch::Guard) -> Shared<'a, SkipList> { 54 | // self.lock_exclusive(); 55 | // defer! {self.unlock_exclusive()} 56 | let st = self.mt.as_ref().unwrap().load(Ordering::Relaxed, &p); 57 | st 58 | } 59 | 60 | pub fn mt_clone(&self) -> SkipList { 61 | // self.lock_exclusive(); 62 | // defer! {self.unlock_exclusive()} 63 | let p = crossbeam_epoch::pin(); 64 | let mt = self.mt_ref(&p); 65 | unsafe { mt.as_ref().unwrap().clone() } 66 | } 67 | 68 | pub fn imm(&self) -> RwLockWriteGuard<'_, RawRwLock, Vec> { 69 | // self.lock_exclusive(); 70 | // defer! {self.unlock_exclusive()} 71 | self.imm.write() 72 | } 73 | 74 | // TODO 75 | pub fn swap_st(&self, opt: Options) { 76 | self.lock_exclusive(); 77 | defer! {self.unlock_exclusive()} 78 | let p = crossbeam_epoch::pin(); 79 | let st = self.take(&p).into(); 80 | self.imm.write().push(st); 81 | let st = SkipList::new(opt.arena_size() as usize); 82 | self.mt 83 | .as_ref() 84 | .unwrap() 85 | .store(crossbeam_epoch::Owned::new(st), Ordering::Relaxed); 86 | self.mt_seq.fetch_add(1, Ordering::Relaxed); 87 | } 88 | 89 | pub fn advance_imm(&self, _mt: &SkipList) { 90 | self.lock_exclusive(); 91 | defer! {self.unlock_exclusive()}; 92 | info!( 93 | "advance im, mt_seq: {}", 94 | self.mt_seq.load(Ordering::Relaxed) 95 | ); 96 | let mut imm = self.imm(); 97 | // let first_imm = imm 98 | // .first() 99 | // .unwrap() 100 | // .load(Ordering::Relaxed, &crossbeam_epoch::pin()) 101 | // .as_raw(); 102 | // assert!(ptr::eq(first_imm, mt)); 103 | imm.remove(0); 104 | } 105 | 106 | pub fn lock_exclusive(&self) { 107 | use parking_lot::lock_api::RawRwLock; 108 | unsafe { self.share_lock.raw().lock_exclusive() } 109 | } 110 | 111 | pub fn unlock_exclusive(&self) { 112 | use parking_lot::lock_api::RawRwLock; 113 | unsafe { self.share_lock.raw().unlock_exclusive() } 114 | } 115 | } 116 | 117 | #[test] 118 | fn ti() {} 119 | -------------------------------------------------------------------------------- /src/table/builder.rs: -------------------------------------------------------------------------------- 1 | use crate::y::{hash, is_eof, Decode, Encode, ValueStruct}; 2 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 3 | use drop_cell::defer; 4 | use growable_bloom_filter::GrowableBloom; 5 | use log::{debug, info}; 6 | use serde_json; 7 | use std::hash::Hasher; 8 | use std::io::{Cursor, Read, Write}; 9 | use std::time::SystemTime; 10 | 11 | // TODO use simd 12 | #[derive(Clone, Default, Debug)] 13 | pub(crate) struct Header { 14 | pub(crate) p_len: u16, // Overlap with base key(Prefix length) 15 | pub(crate) k_len: u16, // Length of the diff. Eg: "d" = "abcd" - "abc" 16 | pub(crate) v_len: u16, // Length of the value. 17 | pub(crate) prev: u32, // Offset for the previous key-value pair. The offset is relative to `block` base offset. 18 | } 19 | 20 | impl Header { 21 | pub(crate) const fn size() -> usize { 22 | 10 23 | } 24 | 25 | pub(crate) fn is_dummy(&self) -> bool { 26 | self.k_len == 0 && self.p_len == 0 27 | } 28 | } 29 | 30 | impl Decode for Header { 31 | fn dec(&mut self, rd: &mut dyn Read) -> crate::Result<()> { 32 | self.p_len = rd.read_u16::()?; 33 | self.k_len = rd.read_u16::()?; 34 | self.v_len = rd.read_u16::()?; 35 | self.prev = rd.read_u32::()?; 36 | Ok(()) 37 | } 38 | } 39 | 40 | impl Encode for Header { 41 | fn enc(&self, wt: &mut dyn Write) -> crate::Result { 42 | wt.write_u16::(self.p_len)?; 43 | wt.write_u16::(self.k_len)?; 44 | wt.write_u16::(self.v_len)?; 45 | wt.write_u32::(self.prev)?; 46 | Ok(Header::size()) 47 | } 48 | } 49 | 50 | impl From<&[u8]> for Header { 51 | fn from(buffer: &[u8]) -> Self { 52 | let mut header = Header::default(); 53 | Decode::dec(&mut header, &mut Cursor::new(buffer)).unwrap(); 54 | header 55 | } 56 | } 57 | 58 | impl Into> for Header { 59 | fn into(self) -> Vec { 60 | let mut wt = Cursor::new(vec![0u8; Header::size()]); 61 | Encode::enc(&self, &mut wt).unwrap(); 62 | wt.into_inner() 63 | } 64 | } 65 | 66 | // Used in building a table. 67 | pub struct Builder { 68 | counter: usize, // Number of keys written for the current block. 69 | buf: Cursor>, // bytes buffer 70 | base_key: Vec, // Base key for the current block. 71 | base_offset: u32, // Offset for the current block. 72 | restarts: Vec, // Base offsets of every block. 73 | prev_offset: u32, // Tracks offset for the previous key-value-pair. Offset is relative to block base offset. 74 | key_buf: Cursor>, 75 | key_count: u32, 76 | } 77 | 78 | impl Builder { 79 | // the max keys number of every block. 80 | pub(crate) const RESTART_INTERVAL: usize = 100; 81 | pub(crate) fn is_zero_bytes(&self) -> bool { 82 | self.buf.position() == 0 83 | } 84 | 85 | /// Returns a suffix of new_key that is different from b.base_key. 86 | fn key_diff<'a>(new_key: &'a [u8], key: &'a [u8]) -> &'a [u8] { 87 | let mut i = 0usize; 88 | while i < new_key.len() && i < key.len() { 89 | if new_key[i] != key[i] { 90 | break; 91 | } 92 | i += 1; 93 | } 94 | &new_key[i..] 95 | } 96 | 97 | fn add_helper(&mut self, key: &[u8], v: &ValueStruct) { 98 | // Add key to bloom filter. 99 | self.key_buf 100 | .write_u16::(key.len() as u16) 101 | .unwrap(); 102 | self.key_buf.write_all(key).unwrap(); 103 | self.key_count += 1; 104 | 105 | // diff_key stores the difference of key with base_key. 106 | let diff_key; 107 | if self.base_key.is_empty() { 108 | // Make a copy. Builder should not keep references. Otherwise, caller has to be very careful 109 | // and will have to make copies of keys every time they add to builder. which is even worse. 110 | self.base_key.clear(); 111 | self.base_key.extend_from_slice(key); 112 | diff_key = key; 113 | } else { 114 | diff_key = Self::key_diff(key, self.base_key.as_slice()); 115 | } 116 | let h = Header { 117 | p_len: (key.len() - diff_key.len()) as u16, 118 | k_len: diff_key.len() as u16, 119 | v_len: (v.value.len() + ValueStruct::header_size()) as u16, 120 | prev: self.prev_offset, // prevOffset is the location of the last key-value added. 121 | }; 122 | // Remember current offset for the next Add call. 123 | self.prev_offset = self.buf.get_ref().len() as u32 - self.base_offset; 124 | 125 | // Layout: header, diff_key, value. 126 | self.buf 127 | .write_all(
>>::into(h).as_slice()) 128 | .unwrap(); 129 | self.buf.write_all(diff_key).unwrap(); 130 | self.buf 131 | .write_all(<&ValueStruct as Into>>::into(v).as_slice()) 132 | .unwrap(); 133 | // info!("insert a key-value: {:?}", String::from_utf8_lossy(key)); 134 | // Increment number of keys added for this current block. 135 | self.counter += 1; 136 | } 137 | 138 | // Add a key-value pair that indicates the end of a block. The key and value for this pair should both be empty. 139 | fn finish_block(&mut self) { 140 | // When we are at the end of the block and Valid=false, and the user wants to do a Prev, 141 | // we need a dummy header to tell us the offset of the previous key-value pair. 142 | self.add_helper(b"", &ValueStruct::default()); 143 | } 144 | 145 | /// Add adds a key-value pair to the block. 146 | /// If doNotRestart is true, we will not restart even if b.counter >= restartInterval. 147 | pub fn add(&mut self, key: &[u8], value: &ValueStruct) -> crate::y::Result<()> { 148 | if self.counter >= Self::RESTART_INTERVAL { 149 | self.finish_block(); 150 | // Start a new block. Initialize the block. 151 | self.restarts.push(self.buf.get_ref().len() as u32); 152 | self.counter = 0; 153 | self.base_key.clear(); 154 | self.base_offset = self.buf.get_ref().len() as u32; 155 | // First key-value pair of block has header.prev=MaxInt. 156 | self.prev_offset = u32::MAX; 157 | } 158 | self.add_helper(key, value); 159 | Ok(()) 160 | } 161 | 162 | // TODO: vvv this was the comment on ReachedCapacity. 163 | // FinalSize returns the *rough* final size of the array, counting the header which is not yet written. 164 | // TODO: Look into why there is a discrepancy. I suspect it is because of Write(empty, empty) 165 | // at the end. The diff can vary. 166 | // ReachedCapacity returns true if we... roughly (?) reached capacity? 167 | pub(crate) fn reached_capacity(&self, cap: u64) -> bool { 168 | let estimate_sz = 169 | self.buf.get_ref().len() + 8 /* empty header */ + 4*self.restarts.len() + 8; 170 | // 8 = end of buf offset + len(restarts). 171 | estimate_sz as u64 > cap 172 | } 173 | 174 | // blockIndex generates the block index for the table. 175 | // It is mainly a list of all the block base offsets. 176 | fn block_index(&mut self) -> Vec { 177 | // Store the end offset, so we know the length of the final block. 178 | self.restarts.push(self.buf.get_ref().len() as u32); 179 | 180 | // Add 4 because we want to write out number of restarts at the end. 181 | let sz = 4 * self.restarts.len() + 4; 182 | let mut wt = Cursor::new(vec![0u8; sz]); 183 | for restart in self.restarts.iter() { 184 | wt.write_u32::(*restart).unwrap(); 185 | } 186 | wt.write_u32::(self.restarts.len() as u32) 187 | .unwrap(); 188 | let out = wt.into_inner(); 189 | assert_eq!(out.len(), sz); 190 | out 191 | } 192 | 193 | /// Finishes the table by appending the index. 194 | /// TODO Hash should be calc with parallels. 195 | pub fn finish(&mut self) -> Vec { 196 | let start = SystemTime::now(); 197 | defer! { 198 | let cost = SystemTime::now().duration_since(start).unwrap().as_millis() as u64; 199 | crate::event::get_metrics().block_hash_calc_cost.inc_by(cost); 200 | } 201 | let mut bf = GrowableBloom::new(0.01, self.counter); 202 | loop { 203 | let kl = self.key_buf.read_u16::(); 204 | if is_eof(&kl) { 205 | // No content, finish 206 | break; 207 | } 208 | // Other error, panic it 209 | if kl.is_err() { 210 | panic!("{:?}", &kl.unwrap_err()); 211 | } 212 | let kl = kl.unwrap(); 213 | let mut hash_buffer = vec![0u8; kl as usize]; 214 | self.key_buf.read(&mut hash_buffer).unwrap(); 215 | bf.insert(&hash(&hash_buffer)); 216 | } 217 | // This will never start a new block. 218 | self.finish_block(); 219 | let index = self.block_index(); 220 | self.buf.write_all(&index).unwrap(); 221 | 222 | // Write bloom filter 223 | let bdata = serde_json::to_vec(&bf).unwrap(); 224 | self.buf.write_all(&bdata).unwrap(); 225 | self.buf.write_u32::(bdata.len() as u32).unwrap(); 226 | self.buf.get_ref().clone() 227 | } 228 | } 229 | 230 | impl Default for Builder { 231 | fn default() -> Self { 232 | Self { 233 | counter: 0, 234 | buf: Cursor::new(Vec::with_capacity(64 << 20)), 235 | base_key: vec![], 236 | base_offset: 0, 237 | restarts: vec![], 238 | prev_offset: u32::MAX, 239 | key_buf: Cursor::new(Vec::with_capacity(32 << 20)), 240 | key_count: 0, 241 | } 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /src/table/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod builder; 2 | pub(crate) mod iterator; 3 | pub(crate) mod table; 4 | mod tests; 5 | -------------------------------------------------------------------------------- /src/table/table.rs: -------------------------------------------------------------------------------- 1 | use crate::options::FileLoadingMode; 2 | use crate::options::FileLoadingMode::MemoryMap; 3 | use crate::table::builder::Header; 4 | use crate::y::{hash, mmap, parallel_load_block_key, read_at, Result}; 5 | use crate::{event, hex_str, Error}; 6 | use byteorder::{BigEndian, ReadBytesExt}; 7 | 8 | use growable_bloom_filter::GrowableBloom; 9 | use memmap::MmapMut; 10 | 11 | use std::collections::HashSet; 12 | 13 | use std::fmt::{Debug, Display, Formatter}; 14 | use std::fs::{read_dir, remove_file, File}; 15 | use std::io::{Cursor, Seek, SeekFrom}; 16 | use std::path::Path; 17 | use std::sync::atomic::{AtomicI32, Ordering}; 18 | use std::{fmt, io}; 19 | 20 | #[cfg(target_os = "macos")] 21 | use std::os::unix::fs::FileExt; 22 | 23 | use crate::types::{XArc, XWeak}; 24 | use crate::y::iterator::Xiterator; 25 | 26 | use log::{debug, info, warn}; 27 | 28 | #[cfg(target_os = "windows")] 29 | use std::os::windows::fs::FileExt; 30 | 31 | use drop_cell::defer; 32 | use std::str::pattern::Pattern; 33 | 34 | pub(crate) const FILE_SUFFIX: &str = ".sst"; 35 | 36 | #[derive(Clone, Debug)] 37 | pub(crate) struct KeyOffset { 38 | pub(crate) key: Vec, 39 | offset: usize, 40 | len: usize, 41 | } 42 | 43 | impl Display for KeyOffset { 44 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 45 | f.debug_struct("KeyOffset") 46 | .field("key", &String::from_utf8_lossy(&self.key)) 47 | .field("offset", &self.offset) 48 | .field("len", &self.len) 49 | .finish() 50 | } 51 | } 52 | 53 | pub type Table = XArc; 54 | pub type WeakTable = XWeak; 55 | 56 | impl From for Table { 57 | fn from(value: TableCore) -> Self { 58 | Table::new(value) 59 | } 60 | } 61 | 62 | impl Table { 63 | pub fn incr_ref(&self) { 64 | self.to_ref().incr_ref() 65 | } 66 | 67 | pub fn decr_ref(&self) { 68 | self.to_ref().decr_ref() 69 | } 70 | 71 | pub fn size(&self) -> usize { 72 | self.to_ref().size() 73 | } 74 | 75 | pub fn biggest(&self) -> &[u8] { 76 | &self.biggest 77 | } 78 | 79 | pub fn smallest(&self) -> &[u8] { 80 | &self.smallest 81 | } 82 | } 83 | 84 | pub struct TableCore { 85 | _ref: AtomicI32, 86 | fd: File, 87 | file_name: String, 88 | // Initialized in OpenTable, using fd.Stat() 89 | table_size: usize, 90 | pub(crate) block_index: Vec, 91 | loading_mode: FileLoadingMode, 92 | _mmap: Option, 93 | // Memory mapped. 94 | // The following are initialized once and const. 95 | smallest: Vec, 96 | // smallest keys. 97 | biggest: Vec, 98 | // biggest keys. 99 | id: u64, 100 | bf: GrowableBloom, 101 | } 102 | 103 | impl TableCore { 104 | // assumes file has only one table and opens it. Takes ownership of fd upon function 105 | // entry. Returns a table with one reference count on it (decrementing which may delete the file! 106 | // -- consider t.Close() instead). The fd has to writeable because we call Truncate on it before 107 | // deleting. 108 | pub(crate) fn open_table( 109 | mut fd: File, 110 | filename: &str, 111 | loading_mode: FileLoadingMode, 112 | ) -> Result { 113 | let file_sz = fd.seek(SeekFrom::End(0)).or_else(Err)?; 114 | fd.seek(SeekFrom::Start(0)).or_else(Err)?; 115 | let id = parse_file_id(filename)?; 116 | let mut table = TableCore { 117 | _ref: AtomicI32::new(1), 118 | fd, 119 | file_name: filename.to_string(), 120 | table_size: file_sz as usize, 121 | block_index: vec![], 122 | loading_mode, 123 | _mmap: None, 124 | smallest: vec![], 125 | biggest: vec![], 126 | id, 127 | bf: GrowableBloom::new(0.01, 1), 128 | }; 129 | 130 | #[cfg(any(target_os = "macos", target_os = "linux"))] 131 | if loading_mode == MemoryMap { 132 | table._mmap = Some(mmap(&table.fd, false, file_sz as usize)?); 133 | } else { 134 | table.load_to_ram()?; 135 | } 136 | 137 | #[cfg(any(target_os = "windows"))] 138 | { 139 | warn!("Windows OS only support load file to RAW!!!"); 140 | table.load_to_ram()?; 141 | } 142 | 143 | table.read_index()?; 144 | let table_ref = Table::new(table); 145 | let biggest = { 146 | let iter1 = super::iterator::IteratorImpl::new(table_ref.clone(), true); 147 | defer! {iter1.close()}; 148 | iter1 149 | .rewind() 150 | .map(|item| item.key().to_vec()) 151 | .or_else(|| Some(vec![])) 152 | } 153 | .unwrap(); 154 | 155 | let smallest = { 156 | let iter1 = super::iterator::IteratorImpl::new(table_ref.clone(), false); 157 | defer! {iter1.close()}; 158 | iter1 159 | .rewind() 160 | .map(|item| item.key().to_vec()) 161 | .or_else(|| Some(vec![])) 162 | } 163 | .unwrap(); 164 | let mut tc = table_ref.to_inner().unwrap(); 165 | tc.biggest = biggest; 166 | tc.smallest = smallest; 167 | // info!("open table ==> {}", tc); 168 | Ok(tc) 169 | } 170 | 171 | // increments the refcount (having to do with whether the file should be deleted) 172 | pub(crate) fn incr_ref(&self) { 173 | use std::backtrace::Backtrace; 174 | let count = self._ref.fetch_add(1, Ordering::Release); 175 | let buf = format!( 176 | "incr {} table count {} => {}", 177 | self.id, 178 | count, 179 | self.get_ref() 180 | ); 181 | // info!("{}", buf); 182 | // 183 | // info!( 184 | // "BackTrace at table incr reference: {}", 185 | // Backtrace::force_capture() 186 | // ); 187 | //push_log(buf.as_bytes(), false); 188 | } 189 | // decrements the refcount and possibly deletes the table 190 | pub(crate) fn decr_ref(&self) { 191 | let count = self._ref.fetch_sub(1, Ordering::Release); 192 | let buf = format!( 193 | "decr {} table count {} => {}", 194 | self.id, 195 | count, 196 | self.get_ref() 197 | ); 198 | //push_log(buf.as_bytes(), false); 199 | } 200 | 201 | pub(crate) fn get_ref(&self) -> i32 { 202 | self._ref.load(Ordering::Acquire) 203 | } 204 | } 205 | 206 | impl TableCore { 207 | fn read(&self, off: usize, sz: usize) -> Result> { 208 | if let Some(m) = self._mmap.as_ref() { 209 | if !m.is_empty() { 210 | if m[off..].len() < sz { 211 | return Err(Error::Io(io::ErrorKind::UnexpectedEof.to_string())); 212 | } 213 | return Ok(m[off..off + sz].to_vec()); 214 | } 215 | } 216 | event::get_metrics().num_reads.inc(); 217 | event::get_metrics().num_bytes_read.inc_by(sz as u64); 218 | let mut buffer = vec![0u8; sz]; 219 | read_at(&self.fd, &mut buffer, sz as u64)?; 220 | // todo add stats 221 | Ok(buffer) 222 | } 223 | 224 | fn read_no_fail(&self, off: usize, sz: usize) -> Vec { 225 | self.read(off, sz).unwrap() 226 | } 227 | 228 | // TODO maybe use &self 229 | fn read_index(&mut self) -> Result<()> { 230 | let mut read_pos = self.table_size; 231 | // Read bloom filter. 232 | read_pos -= 4; 233 | let buf = self.read_no_fail(read_pos, 4); 234 | let bloom_len = Cursor::new(buf).read_u32::().unwrap(); 235 | read_pos -= bloom_len as usize; 236 | let data = self.read_no_fail(read_pos, bloom_len as usize); 237 | self.bf = serde_json::from_slice(&data).unwrap(); 238 | 239 | read_pos -= 4; 240 | let restarts_len = Cursor::new(self.read_no_fail(read_pos, 4)) 241 | .read_u32::() 242 | .unwrap(); 243 | 244 | read_pos -= 4 * restarts_len as usize; 245 | let mut buf = Cursor::new(self.read_no_fail(read_pos, 4 * restarts_len as usize)); 246 | 247 | let mut offsets = vec![0u32; restarts_len as usize]; 248 | for i in 0..restarts_len as usize { 249 | offsets[i] = buf.read_u32::().unwrap(); 250 | } 251 | // The last offset stores the end of the last block. 252 | for i in 0..offsets.len() { 253 | let offset = { 254 | if i == 0 { 255 | 0 256 | } else { 257 | offsets[i - 1] 258 | } 259 | }; 260 | let index = KeyOffset { 261 | offset: offset as usize, 262 | len: (offsets[i] - offset) as usize, 263 | key: vec![], 264 | }; 265 | self.block_index.push(index); 266 | } 267 | // todo Why reload key 268 | if self.block_index.len() == 1 { 269 | return Ok(()); 270 | } 271 | 272 | if self._mmap.is_some() { 273 | for (i, block) in self.block_index.clone().iter().enumerate() { 274 | let buffer = self.read(block.offset, Header::size())?; 275 | let head = Header::from(buffer.as_slice()); 276 | assert_eq!( 277 | head.p_len, 0, 278 | "key offset: {}, h.p_len = {}", 279 | block.offset, head.p_len 280 | ); 281 | let out = self.read(Header::size() + block.offset, head.k_len as usize)?; 282 | self.block_index[i].key = out.clone().to_vec(); 283 | } 284 | } else { 285 | let fp = self.fd.try_clone().unwrap(); 286 | let offsets = self 287 | .block_index 288 | .iter() 289 | .map(|key_offset| key_offset.offset as u64) 290 | .collect::>(); 291 | let keys = parallel_load_block_key(fp, offsets); 292 | for i in 0..keys.len() { 293 | self.block_index[i].key = keys[i].to_vec(); 294 | } 295 | } 296 | self.block_index.sort_by(|a, b| a.key.cmp(&b.key)); 297 | Ok(()) 298 | } 299 | 300 | pub(crate) fn block(&self, index: usize) -> Result { 301 | if index >= self.block_index.len() { 302 | return Err("block out of index".into()); 303 | } 304 | let ko = &self.block_index[index]; 305 | let data = self.read(ko.offset, ko.len)?; 306 | Ok(Block { 307 | offset: ko.offset, 308 | data, 309 | }) 310 | } 311 | 312 | pub fn size(&self) -> usize { 313 | self.table_size 314 | } 315 | 316 | pub fn smallest(&self) -> &[u8] { 317 | &self.smallest 318 | } 319 | 320 | pub fn biggest(&self) -> &[u8] { 321 | &self.biggest 322 | } 323 | 324 | pub fn filename(&self) -> &String { 325 | &self.file_name 326 | } 327 | 328 | pub fn id(&self) -> u64 { 329 | self.id 330 | } 331 | 332 | /// Returns true if (but not "only if") the table does not have the key. It does a bloom filter lookup. 333 | pub fn does_not_have(&self, key: &[u8]) -> bool { 334 | let id = hash(key); 335 | self.bf.contains(&id) 336 | } 337 | 338 | /// load to ram that stored with mmap 339 | fn load_to_ram(&mut self) -> Result<()> { 340 | let mut _mmap = MmapMut::map_anon(self.table_size).unwrap(); 341 | let read = read_at(&self.fd, &mut _mmap, 0)?; 342 | if read != self.table_size { 343 | return Err(format!( 344 | "Unable to load file in memory, Table faile: {}", 345 | self.filename() 346 | ) 347 | .into()); 348 | } 349 | // todo stats 350 | self._mmap = Some(_mmap); 351 | event::get_metrics().num_reads.inc(); 352 | event::get_metrics().num_bytes_read.inc_by(read as u64); 353 | Ok(()) 354 | } 355 | } 356 | 357 | impl Drop for TableCore { 358 | fn drop(&mut self) { 359 | let _ref = self.get_ref(); 360 | // We can safely delete this file, because for all the current files, we always have 361 | // at least one reference pointing to them. 362 | #[cfg(any(target_os = "macos", target_os = "linux"))] 363 | if self.loading_mode == MemoryMap { 364 | self._mmap 365 | .as_mut() 366 | .unwrap() 367 | .flush() 368 | .expect("failed to mmap") 369 | } 370 | if _ref == 1 { 371 | let sz = self.fd.metadata().unwrap().len(); 372 | // It's necessary to delete windows files 373 | // This is very important to let the FS know that the file is deleted. 374 | //#[cfg(not(test))] 375 | self.fd.set_len(0).expect("can not truncate file to 0"); 376 | //#[cfg(not(test))] 377 | remove_file(Path::new(&self.file_name)).expect("fail to remove file"); 378 | warn!( 379 | "Drop table: {}, sz:{}, reference: {}, disk: {}", 380 | self.id, sz, _ref, self.file_name 381 | ); 382 | } 383 | } 384 | } 385 | 386 | impl Display for TableCore { 387 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 388 | let index_str = self 389 | .block_index 390 | .iter() 391 | .map(|x| format!("{}", x)) 392 | .collect::>(); 393 | let smallest = hex_str(self.smallest()); 394 | let biggest = hex_str(self.biggest()); 395 | f.debug_struct("Table") 396 | //.field("block_index", &index_str) 397 | .field("_ref", &self._ref.load(Ordering::Relaxed)) 398 | .field("fname", &self.file_name) 399 | .field("size", &self.table_size) 400 | .field("smallest", &smallest) 401 | .field("biggest", &biggest) 402 | .finish() 403 | } 404 | } 405 | 406 | pub(crate) struct Block { 407 | offset: usize, 408 | pub(crate) data: Vec, 409 | } 410 | 411 | impl Debug for Block { 412 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 413 | f.debug_struct("Block") 414 | .field("offset", &self.offset) 415 | .field("len", &self.data.len()) 416 | .finish() 417 | } 418 | } 419 | 420 | type ByKey = Vec; 421 | 422 | pub fn get_id_map(dir: &str) -> HashSet { 423 | let dir = read_dir(dir).unwrap(); 424 | let mut ids = HashSet::new(); 425 | for el in dir { 426 | if el.is_err() { 427 | continue; 428 | } 429 | let dir_el = el.unwrap(); 430 | if dir_el.metadata().unwrap().is_dir() { 431 | continue; 432 | } 433 | let fid = parse_file_id(dir_el.file_name().to_str().unwrap()); 434 | if fid.is_err() { 435 | debug!("Skip file, {:?}", fid.unwrap_err()); 436 | continue; 437 | } 438 | debug!( 439 | "Find a id table, fid: {:?}, fname: {:?}", 440 | fid, 441 | dir_el.file_name() 442 | ); 443 | ids.insert(fid.unwrap()); 444 | } 445 | ids 446 | } 447 | 448 | pub fn parse_file_id(name: &str) -> Result { 449 | use std::str::pattern::Pattern; 450 | let path = Path::new(name); 451 | let filename = path.file_name().unwrap().to_str().unwrap(); 452 | if !FILE_SUFFIX.is_suffix_of(filename) { 453 | return Err(format!("invalid file {}", name).into()); 454 | } 455 | let name = filename.trim_end_matches(FILE_SUFFIX); 456 | name.parse::() 457 | .or_else(|err| Err(err.to_string().into())) 458 | } 459 | 460 | pub fn id_to_filename(id: u64) -> String { 461 | format!("{}{}", id, FILE_SUFFIX) 462 | } 463 | 464 | pub fn new_file_name(id: u64, dir: &str) -> String { 465 | Path::new(dir) 466 | .join(&id_to_filename(id)) 467 | .to_str() 468 | .unwrap() 469 | .to_string() 470 | } 471 | -------------------------------------------------------------------------------- /src/test_data/vlog_file.text: -------------------------------------------------------------------------------- 1 | This is a test data! 2 | warning: `badger-rs` (lib test) generated 569 warnings (run `cargo fix --lib -p badger-rs --tests` to apply 289 suggestions) 3 | Finished test [unoptimized + debuginfo] target(s) in 2.53s 4 | warning: the following packages contain code that will be rejected by a future version of Rust: console_log v0.2.0 5 | note: to see what the problems were, use the option `--future-incompat-report`, or run `cargo report future-incompatibilities --id 277` 6 | Running unittests src/lib.rs (target/debug/deps/badger_rs-fef6c93f1008e2ae) -------------------------------------------------------------------------------- /src/test_util.rs: -------------------------------------------------------------------------------- 1 | use atomic::Atomic; 2 | use chrono::Local; 3 | use log::{info, kv::source::as_map, kv::Source, warn, Level}; 4 | use rand::random; 5 | use std::collections::HashMap; 6 | use std::env::temp_dir; 7 | use std::fs::create_dir_all; 8 | use std::io; 9 | use std::sync::atomic::{AtomicI32, AtomicU64, Ordering}; 10 | use std::sync::Arc; 11 | use std::time::Duration; 12 | use tokio::runtime::Handle; 13 | use tokio_metrics::TaskMonitor; 14 | use tracing_subscriber::fmt::format::Writer; 15 | use tracing_subscriber::fmt::time::FormatTime; 16 | use tracing_subscriber::EnvFilter; 17 | 18 | #[cfg(test)] 19 | pub fn push_log_by_filename(fpath: &str, buf: &[u8]) { 20 | use std::io::Write; 21 | let mut fp = std::fs::File::options() 22 | .write(true) 23 | .append(true) 24 | .create(true) 25 | .open(fpath) 26 | .unwrap(); 27 | fp.write_all(buf).unwrap(); 28 | fp.write_all(b"\n").unwrap(); 29 | } 30 | 31 | #[cfg(test)] 32 | pub fn push_log(buf: &[u8], rd: bool) { 33 | // #[cfg(test)] 34 | // return; 35 | use std::io::Write; 36 | let mut fpath = "raw_log.log"; 37 | let mut fp = if !rd { 38 | std::fs::File::options() 39 | .write(true) 40 | .append(true) 41 | .create(true) 42 | .open(fpath) 43 | .unwrap() 44 | } else { 45 | std::fs::File::options() 46 | .write(true) 47 | .append(true) 48 | .create(true) 49 | .open(random_tmp_dir() + "/" + fpath) 50 | .unwrap() 51 | }; 52 | fp.write_all(buf).unwrap(); 53 | fp.write_all(b"\n").unwrap(); 54 | } 55 | 56 | #[cfg(test)] 57 | pub fn remove_push_log() { 58 | use std::fs::remove_file; 59 | remove_file("raw_log.log"); 60 | } 61 | 62 | #[cfg(test)] 63 | pub(crate) fn tracing_log() { 64 | use libc::remove; 65 | use tracing::{info, Level}; 66 | use tracing_subscriber; 67 | struct LocalTimer; 68 | 69 | impl FormatTime for LocalTimer { 70 | fn format_time(&self, w: &mut Writer<'_>) -> std::fmt::Result { 71 | write!(w, "{}", Local::now().format("%T")) 72 | } 73 | } 74 | 75 | // let default_panic = std::panic::take_hook(); 76 | // std::panic::set_hook(Box::new(move |info| { 77 | // default_panic(info); 78 | // info!("panic info: {}", info); 79 | // std::fs::write("out.put", info.to_string()).expect("TODO: panic message"); 80 | // std::process::exit(1); 81 | // })); 82 | 83 | unsafe { backtrace_on_stack_overflow::enable() }; 84 | 85 | let format = tracing_subscriber::fmt::format() 86 | .with_thread_ids(true) 87 | .with_level(true) 88 | .with_target(true) 89 | .with_line_number(true) 90 | .with_timer(LocalTimer); 91 | 92 | let _ = tracing_subscriber::fmt() 93 | .with_env_filter(EnvFilter::from_default_env()) 94 | .with_writer(io::stdout) 95 | .with_ansi(true) 96 | .event_format(format) 97 | .try_init(); 98 | remove_push_log(); 99 | // let recorder = metrics_prometheus::install(); 100 | } 101 | 102 | #[cfg(test)] 103 | pub(crate) async fn start_metrics() -> TaskMonitor { 104 | let monitor = tokio_metrics::TaskMonitor::new(); 105 | // print task metrics every 500ms 106 | { 107 | let frequency = std::time::Duration::from_millis(500); 108 | let monitor = monitor.clone(); 109 | tokio::spawn(async move { 110 | for metrics in monitor.intervals() { 111 | warn!("{:?}", metrics); 112 | tokio::time::sleep(frequency).await; 113 | } 114 | }); 115 | } 116 | monitor 117 | } 118 | 119 | pub fn random_tmp_dir() -> String { 120 | let id = random::(); 121 | let path = temp_dir().join(id.to_string()).join("badger"); 122 | path.to_str().unwrap().to_string() 123 | } 124 | 125 | pub fn create_random_tmp_dir() -> String { 126 | let fpath = random_tmp_dir(); 127 | create_dir_all(&fpath).unwrap(); 128 | fpath 129 | } 130 | 131 | #[test] 132 | fn it_work() { 133 | #[tracing::instrument(skip_all)] 134 | fn call() { 135 | info!("call c"); 136 | } 137 | 138 | #[tracing::instrument(skip_all)] 139 | fn my_function(my_arg: usize) { 140 | info!("execute my function"); 141 | call(); 142 | } 143 | 144 | tracing_log(); 145 | my_function(1000); 146 | info!("Hello Body"); 147 | } 148 | 149 | #[tokio::test] 150 | async fn runtime_tk() { 151 | use tokio::{sync::RwLock, task::JoinHandle}; 152 | 153 | pub type Future = JoinHandle; 154 | pub type SafeFn = Arc Option + Sync + Send>>; 155 | pub struct SafeFnWrapper { 156 | fn_mut: SafeFn, 157 | } 158 | 159 | impl SafeFnWrapper { 160 | pub fn new(fn_mut: impl FnMut(A) -> Option + Send + Sync + 'static) -> SafeFnWrapper { 161 | SafeFnWrapper::set(Arc::new(RwLock::new(fn_mut))) 162 | } 163 | 164 | pub fn set(fn_mut: SafeFn) -> Self { 165 | Self { fn_mut } 166 | } 167 | 168 | /// Get a clone of the `fn_mut` field (which holds a thread safe `FnMut`). 169 | pub fn get(&self) -> SafeFn { 170 | self.fn_mut.clone() 171 | } 172 | 173 | /// This is an `async` function. Make sure to use `await` on the return value. 174 | pub fn spawn(&self, action: A) -> Future> { 175 | let arc_lock_fn_mut = self.get(); 176 | tokio::spawn(async move { 177 | // Delay before calling the function. 178 | // let delay_ms = rand::thread_rng().gen_range(100..1_000) as u64; 179 | tokio::time::sleep(tokio::time::Duration::from_millis(10)).await; 180 | let mut fn_mut = arc_lock_fn_mut.write().await; // 👀 `unwrap()` for blocking. 181 | fn_mut(action) 182 | }) 183 | } 184 | } 185 | // 186 | fn load() -> impl FnMut(i32) -> Option { 187 | |_| { 188 | println!("HeLoo"); 189 | Some(299) 190 | } 191 | } 192 | } 193 | 194 | #[test] 195 | fn tk2() { 196 | let rt = tokio::runtime::Runtime::new().unwrap(); 197 | let a = Arc::new(std::sync::atomic::AtomicI32::new(10000000)); 198 | let ac = a.clone(); 199 | rt.block_on(async move { 200 | for i in 0..10000 { 201 | let ac = ac.clone(); 202 | tokio::spawn(async move { 203 | ac.fetch_sub(1, Ordering::Relaxed); 204 | }); 205 | } 206 | fn add() -> i32 { 207 | let f = async move { 100 }; 208 | let r = 209 | tokio::task::block_in_place(move || tokio::runtime::Handle::current().block_on(f)); 210 | r 211 | } 212 | 213 | let ret = add(); 214 | println!("return {}", ret); 215 | }); 216 | println!("{}", a.load(Ordering::Relaxed)); 217 | 218 | use itertools::Merge; 219 | } 220 | -------------------------------------------------------------------------------- /src/types.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use std::hint; 4 | use std::ops::{Deref, DerefMut, RangeBounds}; 5 | use std::sync::atomic::{AtomicIsize, Ordering}; 6 | use std::sync::{Arc, TryLockResult, Weak}; 7 | use std::time::Duration; 8 | 9 | use async_channel::{ 10 | bounded, unbounded, Receiver, RecvError, SendError, Sender, TryRecvError, TrySendError, 11 | }; 12 | 13 | use log::info; 14 | 15 | use range_lock::{VecRangeLock, VecRangeLockGuard}; 16 | 17 | use tokio::time::sleep; 18 | 19 | pub type TArcMx = Arc>; 20 | pub type TArcRW = Arc>; 21 | pub type ArcMx = Arc>; 22 | pub type ArcRW = Arc>; 23 | 24 | // Channel like to go's channel 25 | #[derive(Clone)] 26 | pub struct Channel { 27 | rx: Option>, 28 | tx: Option>, 29 | } 30 | 31 | impl Channel { 32 | /// create a *Channel* with n cap 33 | pub fn new(n: usize) -> Self { 34 | let (tx, rx) = bounded(n); 35 | Channel { 36 | rx: Some(rx), 37 | tx: Some(tx), 38 | } 39 | } 40 | 41 | /// try to send message T without blocking 42 | pub fn try_send(&self, msg: T) -> Result<(), TrySendError> { 43 | if let Some(tx) = &self.tx { 44 | return tx.try_send(msg); 45 | } 46 | Ok(()) 47 | } 48 | 49 | /// try to receive a message without blocking 50 | pub fn try_recv(&self) -> Result { 51 | if let Some(rx) = &self.rx { 52 | return rx.try_recv(); 53 | } 54 | Err(TryRecvError::Empty) 55 | } 56 | 57 | /// async receive a message with blocking 58 | pub async fn recv(&self) -> Result { 59 | let rx = self.rx.as_ref().unwrap(); 60 | rx.recv().await 61 | } 62 | 63 | /// async send a message with blocking 64 | pub async fn send(&self, msg: T) -> Result<(), SendError> { 65 | let tx = self.tx.as_ref().unwrap(); 66 | tx.send(msg).await 67 | } 68 | 69 | /// returns Sender 70 | pub fn tx(&self) -> Sender { 71 | self.tx.as_ref().unwrap().clone() 72 | } 73 | 74 | pub fn rx(&self) -> Receiver { 75 | self.rx.as_ref().unwrap().clone() 76 | } 77 | 78 | /// consume tx and return it if exist 79 | pub fn take_tx(&mut self) -> Option> { 80 | self.tx.take() 81 | } 82 | 83 | /// close *Channel*, Sender will be consumed 84 | pub fn close(&self) { 85 | if let Some(tx) = &self.tx { 86 | tx.close(); 87 | } 88 | } 89 | 90 | pub fn is_close(&self) -> bool { 91 | if let Some(tx) = &self.tx { 92 | return tx.is_closed(); 93 | } 94 | true 95 | } 96 | } 97 | 98 | #[derive(Clone)] 99 | pub struct UnChannel { 100 | rx: Option>, 101 | tx: Option>, 102 | } 103 | 104 | impl UnChannel { 105 | pub fn new() -> UnChannel { 106 | let (tx, rx) = unbounded(); 107 | UnChannel { 108 | rx: Some(rx), 109 | tx: Some(tx), 110 | } 111 | } 112 | 113 | /// returns Sender 114 | pub fn tx(&self) -> Option<&Sender> { 115 | self.tx.as_ref() 116 | } 117 | 118 | // /// async receive a message with blocking 119 | pub async fn recv(&mut self) -> Result { 120 | let rx = self.rx.as_ref().unwrap(); 121 | rx.recv().await 122 | } 123 | 124 | /// close *Channel*, Sender will be consumed 125 | pub fn close(&self) { 126 | if let Some(tx) = &self.tx { 127 | tx.close(); 128 | } 129 | } 130 | } 131 | 132 | /// Holds the two things we need to close a routine and wait for it to finish: a chan 133 | /// to tell the routine to shut down, and a wait_group with which to wait for it to finish shutting 134 | /// down. 135 | #[derive(Clone)] 136 | pub struct Closer { 137 | name: String, 138 | closed: Channel<()>, 139 | wait: Arc, 140 | disable_log: bool, 141 | } 142 | 143 | impl Drop for Closer { 144 | fn drop(&mut self) { 145 | assert!(self.wait.load(Ordering::Relaxed) >= 0, "Sanity check!"); 146 | if !self.disable_log { 147 | info!( 148 | "Worker-{}-{} exited", 149 | self.name, 150 | self.wait.load(Ordering::Relaxed) 151 | ); 152 | } 153 | } 154 | } 155 | 156 | impl Closer { 157 | /// create a Closer with *initial* cap Workers 158 | pub fn new(name: String) -> Self { 159 | let close = Closer { 160 | name, 161 | closed: Channel::new(1), 162 | disable_log: false, 163 | wait: Arc::from(AtomicIsize::new(0)), 164 | }; 165 | close 166 | } 167 | 168 | pub fn new_without_log(name: String) -> Self { 169 | let mut closer = Self::new(name); 170 | closer.disable_log = true; 171 | closer 172 | } 173 | 174 | /// Incr delta to the WaitGroup. 175 | pub fn add_running(&self, delta: isize) { 176 | let old = self.wait.fetch_add(delta, Ordering::Relaxed); 177 | assert!(old >= 0, "Sanity check"); 178 | } 179 | 180 | /// Spawn a worker 181 | pub fn spawn(&self) -> Self { 182 | info!( 183 | "spawn a new closer: {}.{}.Worker", 184 | self.name, 185 | self.wait.load(Ordering::Relaxed) 186 | ); 187 | self.add_running(1); 188 | self.clone() 189 | } 190 | 191 | /// Decr delta to the WaitGroup(Note: must be call for every worker avoid leak). 192 | pub fn done(&self) { 193 | let old = self.wait.fetch_sub(1, Ordering::Relaxed); 194 | assert!(old >= 0, "Sanity check"); 195 | } 196 | 197 | /// Signals the `has_been_closed` signal. 198 | pub fn signal(&self) { 199 | self.closed.close(); 200 | } 201 | 202 | /// Gets signaled when signal() is called. 203 | pub fn has_been_closed(&self) -> Channel<()> { 204 | self.closed.clone() 205 | } 206 | 207 | /// Waiting until done 208 | pub async fn wait(&self) { 209 | loop { 210 | if self.wait.load(Ordering::Relaxed) <= 0 { 211 | break; 212 | } 213 | match self.has_been_closed().try_recv() { 214 | Err(err) if err.is_closed() => return, 215 | Err(_) => {} 216 | Ok(()) => return, 217 | } 218 | tokio::time::sleep(Duration::from_micros(1)).await; 219 | } 220 | } 221 | 222 | /// Send a close signal and waiting util done 223 | pub async fn signal_and_wait(&self) { 224 | self.signal(); 225 | loop { 226 | if self.wait.load(Ordering::Relaxed) <= 0 { 227 | break; 228 | } 229 | sleep(Duration::from_nanos(1000)).await; 230 | } 231 | } 232 | } 233 | 234 | #[derive(Debug, Clone)] 235 | pub struct XWeak(Weak); 236 | 237 | #[derive(Debug)] 238 | pub struct XArc(Arc); 239 | 240 | impl Deref for XArc { 241 | type Target = T; 242 | 243 | fn deref(&self) -> &Self::Target { 244 | self.0.deref() 245 | } 246 | } 247 | 248 | impl Clone for XArc { 249 | fn clone(&self) -> Self { 250 | XArc(self.0.clone()) 251 | } 252 | } 253 | 254 | impl XArc { 255 | pub fn new(x: T) -> XArc { 256 | XArc(Arc::new(x)) 257 | } 258 | 259 | pub fn to_ref(&self) -> &T { 260 | self.0.as_ref() 261 | } 262 | 263 | pub fn to_inner(self) -> Option { 264 | Arc::into_inner(self.0) 265 | } 266 | } 267 | 268 | impl XWeak { 269 | pub fn new() -> Self { 270 | Self { 0: Weak::new() } 271 | } 272 | 273 | pub fn upgrade(&self) -> Option> { 274 | self.0.upgrade().map(XArc) 275 | } 276 | 277 | pub fn from(xarc: &XArc) -> Self { 278 | Self { 279 | 0: Arc::downgrade(&xarc.0), 280 | } 281 | } 282 | } 283 | 284 | #[derive(Clone)] 285 | pub struct XVec(pub Arc>); 286 | 287 | impl XVec { 288 | pub fn new(v: Vec) -> Self { 289 | XVec(Arc::new(VecRangeLock::new(v))) 290 | } 291 | 292 | #[inline] 293 | pub fn lock_all(&self) { 294 | let right = self.0.data_len(); 295 | self.lock(0, right) 296 | } 297 | 298 | #[inline] 299 | pub fn lock(&self, left: usize, right: usize) { 300 | loop { 301 | let range = left..right; 302 | if self.0.try_lock(range).is_ok() { 303 | break; 304 | } else { 305 | hint::spin_loop(); 306 | } 307 | } 308 | } 309 | 310 | #[inline] 311 | pub fn try_lock(&self, range: impl RangeBounds) -> TryLockResult> { 312 | self.0.try_lock(range) 313 | } 314 | } 315 | 316 | impl Deref for XVec { 317 | type Target = VecRangeLock; 318 | fn deref(&self) -> &Self::Target { 319 | &self.0 320 | } 321 | } 322 | 323 | #[cfg(test)] 324 | mod tests { 325 | use std::sync::{atomic::AtomicUsize, Arc}; 326 | 327 | use atomic::Ordering; 328 | use crossbeam_epoch::Owned; 329 | 330 | use crate::types::Closer; 331 | 332 | #[test] 333 | fn it_closer() { 334 | let runtime = tokio::runtime::Runtime::new().unwrap(); 335 | runtime.block_on(async { 336 | let closer = Closer::new("test".to_owned()); 337 | let count = Arc::new(AtomicUsize::new(100)); 338 | for i in 0..count.load(Ordering::Relaxed) { 339 | let c = closer.spawn(); 340 | let n = count.clone(); 341 | tokio::spawn(async move { 342 | n.fetch_sub(1, Ordering::Relaxed); 343 | c.done(); 344 | }); 345 | } 346 | closer.signal_and_wait().await; 347 | assert_eq!(count.load(Ordering::Relaxed), 0); 348 | }); 349 | } 350 | 351 | #[tokio::test] 352 | async fn lck() { 353 | use crossbeam_epoch::{self as epoch, Atomic, Shared}; 354 | use std::sync::atomic::Ordering::SeqCst; 355 | 356 | let a = Atomic::new(1234); 357 | let guard = &epoch::pin(); 358 | // let p = a.swap(Shared::null(), SeqCst, guard); 359 | // println!("{:?}", unsafe { p.as_ref().unwrap()}); 360 | let p = a.swap(Owned::new(200), SeqCst, guard); 361 | let p = a.swap(Owned::new(200), SeqCst, guard); 362 | 363 | println!("{:?}", unsafe { p.as_ref().unwrap() }); 364 | } 365 | } 366 | -------------------------------------------------------------------------------- /src/value_log_tests.rs: -------------------------------------------------------------------------------- 1 | use crate::kv::KVCore; 2 | use crate::options::Options; 3 | use crate::value_log::{Entry, MetaBit, Request}; 4 | use awaitgroup::WaitGroup; 5 | use std::cell::RefCell; 6 | use std::env::temp_dir; 7 | use std::fs; 8 | use std::sync::Arc; 9 | 10 | fn new_test_options(dir: String) -> Options { 11 | let mut opt = Options::default(); 12 | opt.max_table_size = 1 << 15; // Force more compaction 13 | opt.level_one_size = 4 << 15; // Force more compaction. 14 | opt.dir = Box::new(dir.clone()); 15 | opt.value_dir = Box::new(dir.clone()); 16 | return opt; 17 | } 18 | 19 | // #[test] 20 | // fn value_basic() { 21 | // mock_log_terminal(); 22 | // let dir = random_tmp_dir(); 23 | // println!("{}", dir); 24 | // let mut kv = KV::new(new_test_options(dir)).unwrap(); 25 | // // Use value big enough that the value log writes them even if SyncWrites is false. 26 | // let val1 = b"sampleval012345678901234567890123"; 27 | // let val2 = b"samplevalb012345678901234567890123"; 28 | // assert!(val1.len() >= kv.opt.value_threshold); 29 | // 30 | // let entry1 = Entry { 31 | // key: b"samplekey".to_vec(), 32 | // value: val1.to_vec(), 33 | // meta: MetaBit::BitValuePointer.bits(), 34 | // cas_counter_check: 22222, 35 | // cas_counter: 33333, 36 | // offset: 0, 37 | // user_meta: 0, 38 | // }; 39 | // let entry2 = Entry { 40 | // key: b"samplekeyb".to_vec(), 41 | // value: val2.to_vec(), 42 | // meta: MetaBit::BitValuePointer.bits(), 43 | // cas_counter_check: 22225, 44 | // cas_counter: 33335, 45 | // offset: 0, 46 | // user_meta: 0, 47 | // }; 48 | // 49 | // let mut wait = WaitGroup::new(); 50 | // let b = Request { 51 | // entries: vec![RefCell::new(entry1), RefCell::new(entry2)], 52 | // ptrs: RefCell::new(vec![]), 53 | // wait_group: RefCell::new(Some(wait.worker())), 54 | // err: RefCell::new(Arc::new(Ok(()))), 55 | // }; 56 | // // todo add event stats 57 | // 58 | // kv.must_mut_vlog() 59 | // .write(&vec![b]) 60 | // .expect("TODO: panic message"); 61 | // } 62 | -------------------------------------------------------------------------------- /src/y/codec.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use std::io::{Read, Write}; 3 | 4 | use crate::Result; 5 | 6 | pub trait Encode { 7 | fn enc(&self, wt: &mut dyn Write) -> Result; 8 | } 9 | 10 | pub trait Decode { 11 | fn dec(&mut self, rd: &mut dyn Read) -> Result<()>; 12 | } 13 | use tokio::io::{AsyncRead, AsyncWrite}; 14 | 15 | #[async_trait] 16 | pub trait AsyncEncDec 17 | where 18 | R: AsyncRead, 19 | W: AsyncWrite, 20 | { 21 | async fn enc(&self, wt: &mut W) -> Result; 22 | async fn dec(&mut self, rd: &R) -> Result<()>; 23 | } 24 | -------------------------------------------------------------------------------- /src/y/iterator.rs: -------------------------------------------------------------------------------- 1 | use byteorder::BigEndian; 2 | use byteorder::{ReadBytesExt, WriteBytesExt}; 3 | use log::info; 4 | 5 | use serde::{Deserialize, Serialize}; 6 | use std::io::{Cursor, Write}; 7 | 8 | /// ValueStruct represents the value info that can be associated with a key, but also the internal 9 | /// Meta field. 10 | /// |meta|user_meta|cas_counter|value_size|value| 11 | #[derive(Debug, Clone, Default, PartialEq, Serialize, Deserialize)] 12 | #[repr(C)] 13 | pub struct ValueStruct { 14 | pub(crate) meta: u8, 15 | pub(crate) user_meta: u8, 16 | pub(crate) cas_counter: u64, 17 | pub(crate) value: Vec, 18 | } 19 | 20 | impl ValueStruct { 21 | pub(crate) fn new(value: Vec, meta: u8, user_meta: u8, cas_counter: u64) -> ValueStruct { 22 | ValueStruct { 23 | meta, 24 | user_meta, 25 | cas_counter, 26 | value, 27 | } 28 | } 29 | pub(crate) const fn header_size() -> usize { 30 | 10 31 | } 32 | 33 | pub(crate) fn size(&self) -> usize { 34 | Self::header_size() + self.value.len() 35 | } 36 | 37 | pub(crate) fn write_data(&self, buffer: &mut [u8]) { 38 | use std::io::Write; 39 | let mut cursor = Cursor::new(buffer); 40 | cursor.write_u8(self.meta).unwrap(); 41 | cursor.write_u8(self.user_meta).unwrap(); 42 | cursor.write_u64::(self.cas_counter).unwrap(); 43 | cursor.write_all(&self.value).unwrap(); 44 | } 45 | 46 | pub(crate) fn read_data(&mut self, buffer: &[u8]) { 47 | let mut cursor = Cursor::new(buffer); 48 | self.meta = cursor.read_u8().unwrap(); 49 | self.user_meta = cursor.read_u8().unwrap(); 50 | self.cas_counter = cursor.read_u64::().unwrap(); 51 | self.value.extend_from_slice(&buffer[Self::header_size()..]); 52 | } 53 | 54 | #[cfg(test)] 55 | pub(crate) fn pretty(&self) -> String { 56 | use crate::hex_str; 57 | format!( 58 | "meta: {}, user_meta: {}, cas: {}, value: {}", 59 | self.meta, 60 | self.user_meta, 61 | self.cas_counter, 62 | hex_str(&self.value) 63 | ) 64 | } 65 | } 66 | 67 | impl From for ValueStruct 68 | where 69 | T: AsRef<[u8]>, 70 | { 71 | fn from(buffer: T) -> Self { 72 | let mut v = ValueStruct::default(); 73 | v.read_data(buffer.as_ref()); 74 | v 75 | } 76 | } 77 | 78 | impl Into> for &ValueStruct { 79 | fn into(self) -> Vec { 80 | let mut buffer = vec![0; self.size()]; 81 | self.write_data(&mut buffer); 82 | buffer 83 | } 84 | } 85 | 86 | /// A iterator trait 87 | pub trait Xiterator { 88 | /// The iterator element 89 | type Output; 90 | /// Same to std iterator next 91 | fn next(&self) -> Option; 92 | /// Same to std iterator rev (But not implement by now!) 93 | // fn rev(&self) -> Option { 94 | // todo!() 95 | // } 96 | /// Seeks to first element (or last element for reverse iterator). 97 | fn rewind(&self) -> Option; 98 | /// Seek with key, return a element that it's key >= key or <= key. 99 | fn seek(&self, key: &[u8]) -> Option; 100 | /// Peek current element 101 | fn peek(&self) -> Option { 102 | todo!() 103 | } 104 | /// The iterator indetify 105 | fn id(&self) -> String { 106 | return "unknown_id".to_owned(); 107 | } 108 | 109 | /// Close the iterator 110 | fn close(&self) { 111 | info!("close the iterator: {}", self.id()); 112 | } 113 | } 114 | 115 | pub trait KeyValue { 116 | fn key(&self) -> &[u8]; 117 | fn value(&self) -> V; 118 | } 119 | 120 | // impl Iterator for dyn Xiterator { 121 | // type Item = T; 122 | // 123 | // fn next(&mut self) -> Option { 124 | // todo!() 125 | // } 126 | // } 127 | -------------------------------------------------------------------------------- /src/y/metrics.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/y/mod.rs: -------------------------------------------------------------------------------- 1 | mod codec; 2 | pub(crate) mod iterator; 3 | pub mod merge_iterator; 4 | mod metrics; 5 | 6 | pub use codec::{AsyncEncDec, Decode, Encode}; 7 | pub use iterator::*; 8 | #[cfg(any(target_os = "macos", target_os = "linux"))] 9 | use libc::O_DSYNC; 10 | use log::error; 11 | use memmap::MmapMut; 12 | pub use merge_iterator::*; 13 | use std::cmp::Ordering; 14 | use std::collections::hash_map::DefaultHasher; 15 | 16 | use std::fs::{File, OpenOptions}; 17 | use std::hash::Hasher; 18 | use std::io::{ErrorKind, Write}; 19 | 20 | use std::backtrace::Backtrace; 21 | use std::{array, cmp, io}; 22 | use thiserror::Error; 23 | use tracing::info; 24 | 25 | #[cfg(any(target_os = "windows"))] 26 | use winapi::um::winbase; 27 | 28 | pub const EMPTY_SLICE: Vec = vec![]; 29 | 30 | /// Constants use in serialization sizes, and in ValueStruct serialization 31 | pub const META_SIZE: usize = 1; 32 | pub const USER_META_SIZE: usize = 1; 33 | pub const CAS_SIZE: usize = 8; 34 | pub const VALUE_SIZE: usize = 4; 35 | 36 | #[derive(Debug, Error, Clone)] 37 | pub enum Error { 38 | #[error(transparent)] 39 | StdIO(#[from] eieio::Error), 40 | 41 | #[error("io error: {0}")] 42 | Io(String), 43 | #[error("{0}")] 44 | Unexpected(String), 45 | 46 | /// Return when a log file containing the value is not found. 47 | /// This usually indicates that it may have been garbage collected, and the 48 | /// operation needs to be retried. 49 | #[error("Unable to find log file. Please retry")] 50 | ValueRetry, 51 | /// Returned when a CompareAndSet operation has failed due 52 | /// to a counter mismatch. 53 | #[error("CompareAndSet failed due to counter mismatch")] 54 | ValueCasMisMatch, 55 | /// Returned 56 | #[error("SetIfAbsent failed since key already exists")] 57 | ValueKeyExists, 58 | /// Returned if threshold is set to zero, and value log GC is called. 59 | /// In such a case, GC can't be run. 60 | #[error("Value log GC can't run because threshold is set to zero")] 61 | ValueThresholdZero, 62 | /// Returned if a call for value log GC doesn't result in a log file rewrite. 63 | #[error("Value log GC attempt didn't result in any cleanup")] 64 | ValueNoRewrite, 65 | /// Returned if a value log GC is called either while another GC is running, or 66 | /// after KV::Close has been called. 67 | #[error("Value log GC request rejected")] 68 | ValueRejected, 69 | /// Returned if the user request is invalid. 70 | #[error("Invalid request")] 71 | ValueInvalidRequest, 72 | #[error("Invalid Dir, directory does not exist")] 73 | InValidDir, 74 | #[error("Invalid ValueLogFileSize, must be between 1MB and 2GB")] 75 | ValueLogSize, 76 | 77 | ////////////////////////////////// 78 | // valueLog error 79 | ///////////// 80 | #[error("Too few bytes read")] 81 | TooFewBytes, 82 | /// Indicates an end of file then trying to read from a memory mapped file 83 | /// and encountering the end of slice. 84 | #[error("End of mapped region")] 85 | EOF, 86 | #[error("Manifest has bad magic")] 87 | BadMagic, 88 | ///////////////////////////////// 89 | #[error("Not found")] 90 | NotFound, 91 | //////////////////////////////// 92 | // GC 93 | #[error("Stop iteration")] 94 | StopGC, 95 | } 96 | 97 | impl Default for Error { 98 | fn default() -> Self { 99 | Self::Unexpected("".into()) 100 | } 101 | } 102 | 103 | impl Error { 104 | pub fn is_io(&self) -> bool { 105 | match self { 106 | Error::StdIO(_err) => true, 107 | _ => false, 108 | } 109 | } 110 | 111 | pub fn is_io_eof(&self) -> bool { 112 | match self { 113 | Error::StdIO(err) if err.kind() == ErrorKind::UnexpectedEof => true, 114 | _ => false, 115 | } 116 | } 117 | 118 | pub fn is_io_existing(&self) -> bool { 119 | match self { 120 | Error::StdIO(err) => { 121 | if err.kind() == io::ErrorKind::AlreadyExists { 122 | return true; 123 | } 124 | if let Some(code) = err.raw_os_error() { 125 | return code == 2; 126 | } 127 | false 128 | } 129 | _ => false, 130 | } 131 | } 132 | 133 | pub(crate) fn is_io_notfound(&self) -> bool { 134 | match self { 135 | Error::StdIO(err) if err.kind() == ErrorKind::NotFound => true, 136 | _ => false, 137 | } 138 | } 139 | 140 | pub fn is_not_found(&self) -> bool { 141 | match self { 142 | Error::NotFound => true, 143 | _ => false, 144 | } 145 | } 146 | 147 | pub fn is_exists(&self) -> bool { 148 | match self { 149 | Error::ValueKeyExists => true, 150 | _ => false, 151 | } 152 | } 153 | } 154 | 155 | impl From<&'static str> for Error { 156 | #[inline] 157 | fn from(s: &'static str) -> Self { 158 | Self::Unexpected(s.to_string()) 159 | } 160 | } 161 | 162 | impl From for Error { 163 | #[inline] 164 | fn from(s: String) -> Self { 165 | Self::Unexpected(s) 166 | } 167 | } 168 | 169 | impl From for Error { 170 | fn from(value: io::Error) -> Self { 171 | Error::StdIO(eieio::Error::from(value)) 172 | } 173 | } 174 | 175 | pub type Result = std::result::Result; 176 | 177 | #[inline] 178 | pub fn is_eof(ret: &io::Result) -> bool { 179 | if ret.is_ok() { 180 | return false; 181 | } 182 | match ret { 183 | Err(err) if err.kind() == ErrorKind::UnexpectedEof => true, 184 | _ => false, 185 | } 186 | } 187 | 188 | #[inline] 189 | pub fn is_existing(ret: &io::Result) -> bool { 190 | if ret.is_ok() { 191 | return false; 192 | } 193 | match ret { 194 | Err(err) if err.kind() == ErrorKind::AlreadyExists => true, 195 | _ => false, 196 | } 197 | } 198 | 199 | // TODO add SIMD hash 200 | #[inline] 201 | pub fn hash(buffer: &[u8]) -> u64 { 202 | let mut hasher = DefaultHasher::default(); 203 | hasher.write(buffer); 204 | hasher.finish() 205 | } 206 | 207 | pub fn mmap(fd: &File, _writable: bool, size: usize) -> Result { 208 | let m = unsafe { 209 | memmap::MmapOptions::new() 210 | .offset(0) 211 | .len(size) 212 | .map_mut(fd) 213 | .map_err(|_| "Failed to mmap")? 214 | }; 215 | Ok(m) 216 | } 217 | 218 | pub fn open_synced_file(file_name: &str, _sync: bool) -> Result { 219 | let file = File::options() 220 | .write(true) 221 | .read(true) 222 | .create(true) 223 | .append(true) 224 | .open(file_name) 225 | .or_else(Err)?; 226 | Ok(file) 227 | } 228 | 229 | #[cfg(any(target_os = "macos", target_os = "linux"))] 230 | pub(crate) fn read_at(fp: &File, buffer: &mut [u8], offset: u64) -> Result { 231 | use std::os::unix::fs::FileExt; 232 | fp.read_at(buffer, offset).map_err(|err| err.into()) 233 | } 234 | 235 | #[cfg(target_os = "windows")] 236 | pub(crate) fn read_at(fp: &File, buffer: &mut [u8], offset: u64) -> Result { 237 | use std::os::windows::fs::FileExt; 238 | fp.seek_read(buffer, offset).map_err(|err| err.into()) 239 | } 240 | 241 | pub(crate) fn num_cpu() -> usize { 242 | let n = num_cpus::get(); 243 | n 244 | } 245 | 246 | // todo add error 247 | pub(crate) fn parallel_load_block_key(fp: File, offsets: Vec) -> Vec> { 248 | use crate::table::builder::Header; 249 | use std::sync::mpsc::sync_channel; 250 | use threads_pool::*; 251 | let (tx, rx) = sync_channel(offsets.len()); 252 | let num = num_cpu(); 253 | let mut pool = ThreadPool::new(num); 254 | for (i, offset) in offsets.iter().enumerate() { 255 | let offset = *offset; 256 | let fp = fp.try_clone().unwrap(); 257 | let tx = tx.clone(); 258 | pool.execute(move || { 259 | let mut buffer = vec![0u8; Header::size()]; 260 | read_at(&fp, &mut buffer, offset).unwrap(); 261 | let head = Header::from(buffer.as_slice()); 262 | assert_eq!( 263 | head.p_len, 0, 264 | "key offset: {}, h.p_len = {}", 265 | offset, head.p_len 266 | ); 267 | let out = vec![0u8; head.k_len as usize]; 268 | read_at(&fp, &mut buffer, offset + Header::size() as u64).unwrap(); 269 | tx.send((i, out)).unwrap(); 270 | }) 271 | .unwrap(); 272 | } 273 | pool.close(); 274 | 275 | let mut keys = vec![vec![0u8]; offsets.len()]; 276 | for _ in 0..offsets.len() { 277 | let (i, key) = rx.recv().unwrap(); 278 | keys[i] = key; 279 | } 280 | drop(tx); 281 | keys 282 | } 283 | 284 | pub(crate) fn slice_cmp_gte(a: &[u8], b: &[u8]) -> cmp::Ordering { 285 | match a.cmp(&b) { 286 | cmp::Ordering::Less => cmp::Ordering::Less, 287 | cmp::Ordering::Greater => cmp::Ordering::Equal, 288 | cmp::Ordering::Equal => cmp::Ordering::Equal, 289 | } 290 | } 291 | 292 | #[cfg(any(target_os = "macos", target_os = "linux"))] 293 | pub(crate) fn open_existing_synced_file(file_name: &str, synced: bool) -> Result { 294 | use std::os::unix::fs::OpenOptionsExt; 295 | if synced { 296 | File::options() 297 | .write(true) 298 | .read(true) 299 | .custom_flags(O_DSYNC) 300 | .open(file_name) 301 | .map_err(|err| err.into()) 302 | } else { 303 | File::options() 304 | .write(true) 305 | .read(true) 306 | .open(file_name) 307 | .map_err(|err| err.into()) 308 | } 309 | } 310 | 311 | #[cfg(any(target_os = "windows"))] 312 | pub(crate) fn open_existing_synced_file(file_name: &str, synced: bool) -> Result { 313 | use std::fs::OpenOptions; 314 | use std::os::windows::prelude::*; 315 | use winapi::um::winbase; 316 | if synced { 317 | File::options() 318 | .write(true) 319 | .read(true) 320 | // .custom_flags(winbase::FILE_FLAG_WRITE_THROUGH) 321 | .open(file_name) 322 | .map_err(|err| err.into()) 323 | } else { 324 | File::options() 325 | .write(true) 326 | .read(true) 327 | .open(file_name) 328 | .map_err(|err| err.into()) 329 | } 330 | } 331 | 332 | pub(crate) fn create_synced_file(file_name: &str, _synce: bool) -> Result { 333 | OpenOptions::new() 334 | .write(true) 335 | .read(true) 336 | .create(true) 337 | .append(true) 338 | .open(file_name) 339 | .map_err(|err| err.into()) 340 | } 341 | 342 | pub(crate) fn async_create_synced_file(file_name: &str, synced: bool) -> Result { 343 | let fp = create_synced_file(file_name, synced)?; 344 | Ok(tokio::fs::File::from_std(fp)) 345 | } 346 | 347 | pub(crate) fn sync_directory(d: &str) -> Result<()> { 348 | let fp = File::open(d)?; 349 | fp.sync_all().map_err(|err| err.into()) 350 | } 351 | 352 | pub(crate) async fn async_sync_directory(d: String) -> Result<()> { 353 | let fp = tokio::fs::File::open(d).await?; 354 | fp.sync_all().await?; 355 | Ok(()) 356 | } 357 | 358 | pub(crate) fn hex_str(buf: &[u8]) -> String { 359 | String::from_utf8(buf.to_vec()).unwrap_or_else(|_| "Sorry, Hex String Failed!!!".to_string()) 360 | } 361 | 362 | #[cfg(any(target_os = "macos", target_os = "linux"))] 363 | #[test] 364 | fn dsync() { 365 | use std::fs::OpenOptions; 366 | use std::os::unix::fs::OpenOptionsExt; 367 | 368 | let mut options = OpenOptions::new(); 369 | options.write(true); 370 | 371 | options.custom_flags(libc::O_WRONLY); 372 | let file = options.open("foo.txt"); 373 | println!("{:?}", file.err()); 374 | } 375 | 376 | /// find a value in array with binary search 377 | pub fn binary_search(array: &[T], f: F) -> Option 378 | where 379 | F: Fn(&T) -> Ordering, 380 | { 381 | let mut low = 0; 382 | let mut high = array.len() - 1; 383 | while low <= high { 384 | let mid = (low + high) / 2; 385 | match f(&array[mid]) { 386 | Ordering::Equal => return Some(mid), 387 | Ordering::Less => { 388 | low = mid + 1; 389 | } 390 | Ordering::Greater => { 391 | if mid <= 0 { 392 | break; 393 | } 394 | high = mid - 1; 395 | } 396 | } 397 | } 398 | 399 | None 400 | } 401 | 402 | #[test] 403 | fn print_backtrace() { 404 | let buffer = Backtrace::force_capture(); 405 | let mut frames = buffer.frames(); 406 | if frames.len() > 5 { 407 | frames = &frames[0..5]; 408 | } 409 | for frame in frames { 410 | info!("{:?}", frame) 411 | } 412 | } 413 | 414 | #[test] 415 | fn binary_search_test() { 416 | let v = &[1, 2, 3, 4, 5]; 417 | for t in v { 418 | let ok = binary_search(v, |v| v.cmp(t)).unwrap(); 419 | assert!(v[ok].eq(t)); 420 | } 421 | for t in &[0, 6, 7] { 422 | let ok = binary_search(v, |v| v.cmp(t)); 423 | assert!(ok.is_none()); 424 | } 425 | } 426 | --------------------------------------------------------------------------------