├── rust-toolchain ├── clippy.toml ├── .gitignore ├── SECURITY.md ├── Cargo.toml ├── LICENSE ├── benches └── merkle.rs ├── .github └── workflows │ └── ci.yml ├── README.md ├── src ├── store │ ├── vec.rs │ ├── mmap.rs │ ├── disk.rs │ ├── mod.rs │ └── level_cache.rs ├── lib.rs ├── hash.rs ├── hash_impl.rs └── proof.rs └── tests ├── test_arities.rs ├── test_compound_constructors.rs ├── test_compound_compound_constructors.rs ├── common.rs └── test_base_constructors.rs /rust-toolchain: -------------------------------------------------------------------------------- 1 | 1.71.1 2 | -------------------------------------------------------------------------------- /clippy.toml: -------------------------------------------------------------------------------- 1 | disallowed-names = [ 2 | "unreadable_literal" 3 | ] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .DS_Store 3 | **/*.rs.bk 4 | Cargo.lock 5 | .idea 6 | **/*.iml 7 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | For reporting *critical* and *security* bugs, please consult our [Security Policy and Responsible Disclosure Program information](https://github.com/filecoin-project/community/blob/master/SECURITY.md) 6 | 7 | ## Reporting a non security bug 8 | 9 | For non-critical bugs, please simply file a GitHub issue on this repo. 10 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "merkletree" 3 | version = "0.23.0" 4 | authors = [ 5 | "nemo ", 6 | "schomatis", 7 | "Ivan Prisyazhnyy ", 8 | ] 9 | 10 | readme = "README.md" 11 | description = "Light merkle tree implementation with SPV support and dependency agnostic." 12 | license = "BSD-3-Clause" 13 | homepage = "https://github.com/filecoin-project/merkle_light" 14 | repository = "https://github.com/filecoin-project/merkle_light" 15 | documentation = "https://docs.rs/merkletree" 16 | keywords = ["merkle", "merkle-tree"] 17 | categories = ["data-structures", "cryptography"] 18 | edition = "2018" 19 | rust-version = "1.66" 20 | 21 | [package.metadata.release] 22 | pre-release-commit-message = "chore(release): release {{version}}" 23 | 24 | [dependencies] 25 | rayon = "1.0.0" 26 | memmap2 = "0.5.7" 27 | arrayref = "0.3.5" 28 | tempfile = "3.3" 29 | positioned-io = "0.3" 30 | log = "0.4.7" 31 | serde = { version = "1.0", features = ["derive"] } 32 | anyhow = "1.0.23" 33 | typenum = "1.11.2" 34 | 35 | [dev-dependencies] 36 | byteorder = "1.3.1" 37 | env_logger = "0.7.1" 38 | sha2 = "0.10.2" 39 | tempfile = "3.3" 40 | rand = "0.7.3" 41 | criterion = "0.3" 42 | walkdir = "2.3.2" 43 | 44 | [[bench]] 45 | name = "merkle" 46 | path = "benches/merkle.rs" 47 | harness = false 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2016, Spin Research 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /benches/merkle.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | 3 | use merkletree::merkle::{get_merkle_proof_lemma_len, get_merkle_tree_leafs, get_merkle_tree_len}; 4 | 5 | const DEFAULT_NUM_BRANCHES: usize = 2; 6 | 7 | fn bench_get_merkle_tree_leafs(c: &mut Criterion) { 8 | for size in &[1, 256] { 9 | let sector_size = 1024 * 1024 * size; 10 | let tree_size = 2 * (sector_size / 32) - 1; 11 | c.bench_function(&format!("get merkle-tree leafs {size}mib"), |b| { 12 | b.iter(|| get_merkle_tree_leafs(black_box(tree_size), DEFAULT_NUM_BRANCHES) 13 | .expect("[bench_get_merkle_tree_leafs] couldn't compute number of leaves in Merkle Tree")); 14 | }); 15 | } 16 | } 17 | 18 | fn bench_get_merkle_tree_info(c: &mut Criterion) { 19 | let branches = 8; 20 | let sector_size = 1073741824; // 2^30 21 | 22 | c.bench_function("get merkle-tree info 1gib", |b| { 23 | b.iter(|| { 24 | let tree_size = get_merkle_tree_len(black_box(sector_size), branches) 25 | .expect("[bench_get_merkle_tree_info] failed to get len"); 26 | assert_eq!( 27 | get_merkle_tree_leafs(tree_size, branches).expect( 28 | "[bench_get_merkle_tree_info] couldn't compute number of leaves in Merkle Tree" 29 | ), 30 | sector_size 31 | ); 32 | get_merkle_proof_lemma_len(tree_size, branches) 33 | }); 34 | }); 35 | } 36 | 37 | criterion_group!( 38 | benches, 39 | bench_get_merkle_tree_leafs, 40 | bench_get_merkle_tree_info 41 | ); 42 | criterion_main!(benches); 43 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [pull_request, push] 4 | 5 | # Cancel a job if there's a new on on the same branch started. 6 | # Based on https://stackoverflow.com/questions/58895283/stop-already-running-workflow-job-in-github-actions/67223051#67223051 7 | concurrency: 8 | group: ${{ github.ref }} 9 | cancel-in-progress: true 10 | 11 | env: 12 | CARGO_INCREMENTAL: 0 13 | RUST_BACKTRACE: 1 14 | # Faster crates.io index checkout. 15 | CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse 16 | RUST_LOG: debug 17 | 18 | jobs: 19 | check_clippy: 20 | runs-on: ubuntu-24.04 21 | name: Clippy 22 | steps: 23 | - uses: actions/checkout@v4 24 | - name: Run cargo clippy 25 | run: | 26 | rustup component add clippy 27 | cargo clippy --workspace --all-features -- -D warnings 28 | 29 | check_fmt: 30 | runs-on: ubuntu-24.04 31 | name: Checking fmt 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Run cargo fmt 35 | run: | 36 | rustup component add rustfmt 37 | cargo fmt --all -- --check 38 | 39 | rustdoc: 40 | runs-on: ubuntu-24.04 41 | name: Rustdoc 42 | steps: 43 | - uses: actions/checkout@v4 44 | - name: Run rustdoc 45 | run: cargo rustdoc --all-features -- -D warnings 46 | 47 | build: 48 | runs-on: ubuntu-24.04 49 | name: Release build 50 | steps: 51 | - uses: actions/checkout@v4 52 | - name: Run cargo release build 53 | run: cargo build --release 54 | 55 | benches: 56 | runs-on: ubuntu-24.04 57 | name: Run benchmarks 58 | steps: 59 | - uses: actions/checkout@v4 60 | - name: Run benchmarks 61 | run: cargo bench --all-targets 62 | 63 | test: 64 | runs-on: ubuntu-24.04 65 | name: Run cargo test 66 | strategy: 67 | matrix: 68 | cargo-args: ['--workspace --release', '--release -- --ignored'] 69 | steps: 70 | - uses: actions/checkout@v4 71 | - name: Test 72 | run: cargo test ${{ matrix.cargo-args }} 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # merkle 2 | 3 | [![Build Status](https://travis-ci.com/filecoin-project/merkle_light.svg?branch=master&style=flat)](https://travis-ci.com/filecoin-project/merkle_light) 4 | [![Issues](http://img.shields.io/github/issues/filecoin-project/merkle_light.svg?style=flat)](https://github.com/filecoin_project/merkle_light/issues) 5 | ![License](https://img.shields.io/badge/license-bsd3-brightgreen.svg?style=flat) 6 | 7 | *merkle* is a lightweight Rust implementation of a [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree). 8 | 9 | ## Features 10 | 11 | - external dependency agnostic 12 | - `std::hash::Hasher` compatibility 13 | - standard types hasher implementations 14 | - `#[derive(Hashable)]` support for simple struct 15 | - customizable merkle leaf/node hashing algorithm 16 | - support for custom hash types (e.g. [u8; 16], [u64; 4], [u128; 2], struct) 17 | - customizable hashing algorithm 18 | - linear memory layout, no nodes on heap 19 | - buildable from iterator, objects or hashes 20 | - certificate transparency style merkle hashing support 21 | - SPV support included (via proof type) 22 | - supports power of 2 arity merkletrees (only) 23 | - supports compound merkletrees (a tree of merkletrees) 24 | - supports compound-compound merkletrees (a tree of compound merkletrees) 25 | 26 | 27 | ## Documentation 28 | 29 | Documentation is [available](https://docs.rs/merkletree). 30 | 31 | # Examples 32 | 33 | The most relevant examples are located in the following files: 34 | 35 | * `test_common.rs`: custom hash example xor128, misc shared utils 36 | * `test_xor128.rs`: most comprehensive tests for library features 37 | * `proof.rs`: contains impl and tests for proofs across pow2 arity trees 38 | 39 | # Building and testing 40 | 41 | ``` 42 | # Run tests in release mode 43 | cargo test --release --all 44 | 45 | # Run ignored tests in release mode 46 | cargo test --release --all -- --ignored 47 | ``` 48 | 49 | 50 | 51 | ## Bug Reporting 52 | 53 | Please report bugs either as pull requests or as issues in [the issue 54 | tracker](https://github.com/filecoin-project/merkle_light). *merkle* has a 55 | **full disclosure** vulnerability policy. **Please do NOT attempt to report 56 | any security vulnerability in this code privately to anybody.** 57 | 58 | ## License 59 | 60 | See [LICENSE](LICENSE). 61 | -------------------------------------------------------------------------------- /src/store/vec.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{self, Index}; 2 | 3 | use anyhow::Result; 4 | 5 | use crate::merkle::Element; 6 | use crate::store::{Store, StoreConfig}; 7 | 8 | #[derive(Debug, Clone, Default)] 9 | pub struct VecStore(Vec); 10 | 11 | impl ops::Deref for VecStore { 12 | type Target = [E]; 13 | 14 | fn deref(&self) -> &Self::Target { 15 | &self.0 16 | } 17 | } 18 | 19 | impl Store for VecStore { 20 | fn new_with_config(size: usize, _branches: usize, _config: StoreConfig) -> Result { 21 | Self::new(size) 22 | } 23 | 24 | fn new(size: usize) -> Result { 25 | Ok(VecStore(Vec::with_capacity(size))) 26 | } 27 | 28 | fn write_at(&mut self, el: E, index: usize) -> Result<()> { 29 | if self.0.len() <= index { 30 | self.0.resize(index + 1, E::default()); 31 | } 32 | 33 | self.0[index] = el; 34 | Ok(()) 35 | } 36 | 37 | // NOTE: Performance regression. To conform with the current API we are 38 | // unnecessarily converting to and from `&[u8]` in the `VecStore` which 39 | // already stores `E` (in contrast with the `mmap` versions). We are 40 | // prioritizing performance for the `mmap` case which will be used in 41 | // production (`VecStore` is mainly for testing and backwards compatibility). 42 | fn copy_from_slice(&mut self, buf: &[u8], start: usize) -> Result<()> { 43 | ensure!( 44 | buf.len() % E::byte_len() == 0, 45 | "buf size must be a multiple of {}", 46 | E::byte_len() 47 | ); 48 | let num_elem = buf.len() / E::byte_len(); 49 | 50 | if self.0.len() < start + num_elem { 51 | self.0.resize(start + num_elem, E::default()); 52 | } 53 | 54 | self.0.splice( 55 | start..start + num_elem, 56 | buf.chunks_exact(E::byte_len()).map(E::from_slice), 57 | ); 58 | Ok(()) 59 | } 60 | 61 | fn new_from_slice_with_config( 62 | size: usize, 63 | _branches: usize, 64 | data: &[u8], 65 | _config: StoreConfig, 66 | ) -> Result { 67 | Self::new_from_slice(size, data) 68 | } 69 | 70 | fn new_from_slice(size: usize, data: &[u8]) -> Result { 71 | let mut v: Vec<_> = data 72 | .chunks_exact(E::byte_len()) 73 | .map(E::from_slice) 74 | .collect(); 75 | let additional = size - v.len(); 76 | v.reserve(additional); 77 | 78 | Ok(VecStore(v)) 79 | } 80 | 81 | fn new_from_disk(_size: usize, _branches: usize, _config: &StoreConfig) -> Result { 82 | unimplemented!("Cannot load a VecStore from disk"); 83 | } 84 | 85 | fn read_at(&self, index: usize) -> Result { 86 | Ok(self.0[index].clone()) 87 | } 88 | 89 | fn read_into(&self, index: usize, buf: &mut [u8]) -> Result<()> { 90 | self.0[index].copy_to_slice(buf); 91 | Ok(()) 92 | } 93 | 94 | fn read_range_into(&self, _start: usize, _end: usize, _buf: &mut [u8]) -> Result<()> { 95 | unimplemented!("Not required here"); 96 | } 97 | 98 | fn read_range(&self, r: ops::Range) -> Result> { 99 | Ok(self.0.index(r).to_vec()) 100 | } 101 | 102 | fn len(&self) -> usize { 103 | self.0.len() 104 | } 105 | 106 | fn loaded_from_disk(&self) -> bool { 107 | false 108 | } 109 | 110 | fn compact( 111 | &mut self, 112 | _branches: usize, 113 | _config: StoreConfig, 114 | _store_version: u32, 115 | ) -> Result { 116 | self.0.shrink_to_fit(); 117 | 118 | Ok(true) 119 | } 120 | 121 | fn delete(_config: StoreConfig) -> Result<()> { 122 | Ok(()) 123 | } 124 | 125 | fn is_empty(&self) -> bool { 126 | self.0.is_empty() 127 | } 128 | 129 | fn push(&mut self, el: E) -> Result<()> { 130 | self.0.push(el); 131 | Ok(()) 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! light _Merkle Tree_ implementation. 2 | //! 3 | //! Merkle tree (MT) implemented as a full (power of 2) arity tree allocated as a vec 4 | //! of statically sized hashes to give hashes more locality (although disk based backings 5 | //! are supported, as a partial tree disk based backings). MT is specialized 6 | //! to the extent of arity, hashing algorithm and hash item. [`crate::hash::Hashable`] trait is 7 | //! compatible to the `std::hash::Hasher` and supports custom hash algorithms. 8 | //! Implementation does not depend on any external crypto libraries, and tries 9 | //! to be as performant as possible (CPU support only; GPU hashing currently unsupported). 10 | //! 11 | //! This tree implementation uses encoding scheme as in _Certificate Transparency_ 12 | //! by default. Encoding scheme for leafs and nodes can be overridden though. 13 | //! [RFC 6962](https://tools.ietf.org/html/rfc6962): 14 | //! 15 | //! ```text 16 | //! MTH({d(0)}) = ALG(0x00 || d(0)). 17 | //! For n > 1, let k be the largest power of two smaller than n (i.e., 18 | //! k < n <= 2k). The Merkle tree Hash of an n-element list D[n] is then 19 | //! defined recursively as 20 | //! MTH(D[n]) = ALG(0x01 || MTH(D[0:k]) || MTH(D[k:n])), 21 | //! ``` 22 | //! 23 | //! Link: [](https://en.wikipedia.org/wiki/Merkle_tree) 24 | //! 25 | //! # Implementation choices 26 | //! 27 | //! Main idea is the whole code must obtain specialization at compile time with 28 | //! minimum allocations calls, hashes must be of fixed size arrays known at 29 | //! compile time, hash algorithm must be a trait and must not depend on any 30 | //! external cryptographic libraries and the lib itself must somehow mimic std Rust api. 31 | //! 32 | //! Standard way in Rust is to hash objects with a `std::hash::Hasher`, and mainly 33 | //! that is the reason behind the choice of the abstractions: 34 | //! 35 | //! `Object : Hashable -> Hasher + Algorithm <- Merkle Tree` 36 | //! 37 | //! Custom [`crate::hash::Hashable`] trait allows implementations differ 38 | //! from [`std::collections`] related hashes, different implementations for 39 | //! different hashing algorithms / schemas and conforms object-safety trait rules. 40 | //! 41 | //! [`crate::hash::Algorithm`] complements [`std::hash::Hasher`] to be reusable and follows the idea 42 | //! that the result hash is a mapping of the data stream. 43 | //! 44 | //! [`Algorithm.hash`] had to change its signature to be `&mut self` (`&self`) because 45 | //! most of the cryptographic digest algorithms breaks current state on finalization 46 | //! into unusable. `ring` libra tho contains interfaces incompatible to 47 | //! `start-update-finish-reset` lifecycle. It requires either `cloning()` its state 48 | //! on finalization, or `Cell`-ing via unsafe. 49 | //! 50 | //! Turning back to having [`Algorithm.write(&mut self, &[u8])`] instead of 51 | //! `write(T)` allows to relax [`crate::hash::Algorithm`] trait [`std::hash::Hasher`] constraint, 52 | //! even tho works together well still. 53 | //! 54 | //! # Interface 55 | //! 56 | //! ```text 57 | //! - build_tree (items) -> tree 58 | //! - get_root -> hash 59 | //! - gen_proof -> proof 60 | //! - validate_proof (proof, leaf, root) -> bool 61 | //! ``` 62 | //! 63 | //! # Examples 64 | //! 65 | //! [`test_common.rs`]: custom hash example xor128, misc shared utils 66 | //! [`test_xor128.rs`]: most comprehensive tests for library features 67 | //! [`proof.rs`]: contains impl and tests for proofs across pow2 arity trees 68 | //! 69 | 70 | // missing_docs, 71 | #![deny( 72 | unused_qualifications, 73 | missing_debug_implementations, 74 | missing_copy_implementations, 75 | trivial_casts, 76 | trivial_numeric_casts, 77 | unsafe_code, 78 | unstable_features, 79 | unused_import_braces 80 | )] 81 | #![cfg_attr(feature = "nightly", allow(unstable_features))] 82 | 83 | #[macro_use] 84 | extern crate anyhow; 85 | 86 | /// Hash infrastructure for items in Merkle tree. 87 | pub mod hash; 88 | 89 | /// Common implementations for [`crate::hash::Hashable`]. 90 | mod hash_impl; 91 | 92 | /// Store implementations. 93 | pub mod store; 94 | 95 | /// Merkle tree inclusion proof. 96 | pub mod proof; 97 | 98 | /// Merkle tree abstractions, implementation and algorithms. 99 | pub mod merkle; 100 | 101 | /// Tests XOR128. 102 | mod test_legacy; 103 | 104 | #[macro_use] 105 | extern crate arrayref; 106 | -------------------------------------------------------------------------------- /src/hash.rs: -------------------------------------------------------------------------------- 1 | //! Hash infrastructure for items in Merkle Tree. 2 | 3 | use std::hash::Hasher; 4 | 5 | /// A hashable type. 6 | /// 7 | /// Types implementing `Hashable` are able to be [`Hashable::hash`]ed with an instance of 8 | /// [`Hasher`]. 9 | /// 10 | /// ## Implementing `Hashable` 11 | /// 12 | /// You can derive `Hashable` with `#[derive(Hashable)]` if all fields implement `Hashable`. 13 | /// The resulting hash will be the combination of the values from calling 14 | /// [`Hashable::hash`] on each field. 15 | /// 16 | /// ```text 17 | /// #[macro_use] 18 | /// extern crate merkletree_derive; 19 | /// extern crate merkletree; 20 | /// 21 | /// use merkletree::hash::Hashable; 22 | /// 23 | /// fn main() { 24 | /// #[derive(Hashable)] 25 | /// struct Foo { 26 | /// name: String, 27 | /// country: String, 28 | /// } 29 | /// } 30 | /// ``` 31 | /// 32 | /// If you need more control over how a value is hashed, you can of course 33 | /// implement the `Hashable` trait yourself: 34 | /// 35 | /// ``` 36 | /// extern crate merkletree; 37 | /// 38 | /// use merkletree::hash::Hashable; 39 | /// use std::hash::Hasher; 40 | /// use std::collections::hash_map::DefaultHasher; 41 | /// 42 | /// fn main() { 43 | /// struct Person { 44 | /// id: u32, 45 | /// name: String, 46 | /// phone: u64, 47 | /// } 48 | /// 49 | /// impl Hashable for Person { 50 | /// fn hash(&self, state: &mut H) { 51 | /// self.id.hash(state); 52 | /// self.name.hash(state); 53 | /// self.phone.hash(state); 54 | /// } 55 | /// } 56 | /// 57 | /// let foo = Person{ 58 | /// id: 1, 59 | /// name: String::from("blah"), 60 | /// phone: 2, 61 | /// }; 62 | /// 63 | /// let mut hr = DefaultHasher::new(); 64 | /// foo.hash(&mut hr); 65 | /// assert_eq!(hr.finish(), 7101638158313343130) 66 | /// } 67 | /// ``` 68 | /// 69 | /// ## `Hashable` and `Eq` 70 | /// 71 | /// When implementing both `Hashable` and [`Eq`], it is important that the following 72 | /// property holds: 73 | /// 74 | /// ```text 75 | /// k1 == k2 -> hash(k1) == hash(k2) 76 | /// ``` 77 | /// 78 | /// In other words, if two keys are equal, their hashes must also be equal. 79 | pub trait Hashable { 80 | /// Feeds this value into the given [`Hasher`]. 81 | /// 82 | /// [`Hasher`]: trait.Hasher.html 83 | fn hash(&self, state: &mut H); 84 | 85 | /// Feeds a slice of this type into the given [`Hasher`]. 86 | /// 87 | /// [`Hasher`]: trait.Hasher.html 88 | fn hash_slice(data: &[Self], state: &mut H) 89 | where 90 | Self: Sized, 91 | { 92 | for piece in data { 93 | piece.hash(state); 94 | } 95 | } 96 | } 97 | 98 | /// MT leaf hash prefix 99 | const LEAF: u8 = 0x00; 100 | 101 | /// MT interior node hash prefix 102 | const INTERIOR: u8 = 0x01; 103 | 104 | /// A trait for hashing an arbitrary stream of bytes for calculating merkle tree 105 | /// nodes. 106 | /// 107 | /// T is a hash item must be of known size at compile time, globally ordered, with 108 | /// default value as a neutral element of the hash space. Neutral element is 109 | /// interpreted as 0 or nil and required for evaluation of merkle tree. 110 | /// 111 | /// [`Algorithm`] breaks the [`Hasher`] contract at `finish()`, but that is intended. 112 | /// This trait extends [`Hasher`] with `hash -> T` and `reset` state methods, 113 | /// plus implements default behavior of evaluation of MT interior nodes. 114 | pub trait Algorithm: Hasher + Default 115 | where 116 | T: Clone + AsRef<[u8]>, 117 | { 118 | /// Returns the hash value for the data stream written so far. 119 | fn hash(&mut self) -> T; 120 | 121 | /// Reset Hasher state. 122 | #[inline] 123 | fn reset(&mut self) { 124 | *self = Self::default(); 125 | } 126 | 127 | /// Returns hash value for MT leaf (prefix 0x00). 128 | #[inline] 129 | fn leaf(&mut self, leaf: T) -> T { 130 | self.write(&[LEAF]); 131 | self.write(leaf.as_ref()); 132 | self.hash() 133 | } 134 | 135 | /// Returns hash value for MT interior node (prefix 0x01). 136 | #[inline] 137 | fn node(&mut self, left: T, right: T, _height: usize) -> T { 138 | self.write(&[INTERIOR]); 139 | self.write(left.as_ref()); 140 | self.write(right.as_ref()); 141 | self.hash() 142 | } 143 | 144 | /// Returns hash value for MT interior node (prefix 0x01). 145 | #[inline] 146 | fn multi_node(&mut self, nodes: &[T], _height: usize) -> T { 147 | self.write(&[INTERIOR]); 148 | for node in nodes { 149 | self.write(node.as_ref()); 150 | } 151 | self.hash() 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /src/hash_impl.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hasher; 2 | use std::mem; 3 | use std::slice; 4 | 5 | use crate::hash::Hashable; 6 | 7 | macro_rules! impl_write { 8 | ($(($ty:ident, $meth:ident),)*) => {$( 9 | impl Hashable for $ty { 10 | fn hash(&self, state: &mut H) { 11 | state.$meth(*self) 12 | } 13 | 14 | #[allow(trivial_casts, unsafe_code)] 15 | fn hash_slice(data: &[$ty], state: &mut H) { 16 | let newlen = data.len() * mem::size_of::<$ty>(); 17 | let ptr = data.as_ptr() as *const u8; 18 | state.write(unsafe { slice::from_raw_parts(ptr, newlen) }) 19 | } 20 | } 21 | )*} 22 | } 23 | 24 | impl_write! { 25 | (u8, write_u8), 26 | (u16, write_u16), 27 | (u32, write_u32), 28 | (u64, write_u64), 29 | (usize, write_usize), 30 | (i8, write_i8), 31 | (i16, write_i16), 32 | (i32, write_i32), 33 | (i64, write_i64), 34 | (isize, write_isize), 35 | (u128, write_u128), 36 | (i128, write_i128), 37 | } 38 | 39 | impl Hashable for [u8; N] { 40 | fn hash(&self, state: &mut H) { 41 | state.write(self.as_ref()) 42 | } 43 | 44 | #[allow(trivial_casts, unsafe_code)] 45 | fn hash_slice(data: &[[u8; N]], state: &mut H) { 46 | let newlen = mem::size_of_val(data); 47 | let ptr = data.as_ptr() as *const u8; 48 | state.write(unsafe { slice::from_raw_parts(ptr, newlen) }) 49 | } 50 | } 51 | 52 | impl Hashable for Vec { 53 | fn hash(&self, state: &mut H) { 54 | state.write(self.as_ref()); 55 | } 56 | } 57 | 58 | impl Hashable for bool { 59 | fn hash(&self, state: &mut H) { 60 | state.write_u8(*self as u8) 61 | } 62 | } 63 | 64 | impl Hashable for char { 65 | fn hash(&self, state: &mut H) { 66 | state.write_u32(*self as u32) 67 | } 68 | } 69 | 70 | impl Hashable for str { 71 | fn hash(&self, state: &mut H) { 72 | state.write(self.as_bytes()); 73 | // empty str nope: state.write_u8(0xff) 74 | } 75 | } 76 | 77 | impl Hashable for String { 78 | fn hash(&self, state: &mut H) { 79 | state.write(self.as_bytes()); 80 | // empty str nope: state.write_u8(0xff) 81 | } 82 | } 83 | 84 | macro_rules! impl_hash_tuple { 85 | () => ( 86 | impl Hashable for () { 87 | fn hash(&self, _: &mut H) {} 88 | } 89 | ); 90 | 91 | ( $($name:ident)+) => ( 92 | impl),*> Hashable for ($($name,)*) 93 | where 94 | last_type!($($name,)+): ?Sized 95 | { 96 | #[allow(non_snake_case)] 97 | fn hash(&self, state: &mut Z) { 98 | let ($(ref $name,)*) = *self; 99 | $($name.hash(state);)* 100 | } 101 | } 102 | ); 103 | } 104 | 105 | macro_rules! last_type { 106 | ($a:ident,) => { $a }; 107 | ($a:ident, $($rest_a:ident,)+) => { last_type!($($rest_a,)+) }; 108 | } 109 | 110 | impl_hash_tuple! {} 111 | impl_hash_tuple! { A } 112 | impl_hash_tuple! { A B } 113 | impl_hash_tuple! { A B C } 114 | impl_hash_tuple! { A B C D } 115 | impl_hash_tuple! { A B C D E } 116 | impl_hash_tuple! { A B C D E F } 117 | impl_hash_tuple! { A B C D E F G } 118 | impl_hash_tuple! { A B C D E F G H } 119 | impl_hash_tuple! { A B C D E F G H I } 120 | impl_hash_tuple! { A B C D E F G H I J } 121 | impl_hash_tuple! { A B C D E F G H I J K } 122 | impl_hash_tuple! { A B C D E F G H I J K L } 123 | 124 | impl> Hashable for [T] { 125 | fn hash(&self, state: &mut H) { 126 | Hashable::hash_slice(self, state) 127 | } 128 | } 129 | 130 | impl<'a, H: Hasher, T: ?Sized + Hashable> Hashable for &'a T { 131 | fn hash(&self, state: &mut H) { 132 | (**self).hash(state); 133 | } 134 | } 135 | 136 | impl<'a, H: Hasher, T: ?Sized + Hashable> Hashable for &'a mut T { 137 | fn hash(&self, state: &mut H) { 138 | (**self).hash(state); 139 | } 140 | } 141 | 142 | impl Hashable for *const T { 143 | #[allow(trivial_casts, unsafe_code)] 144 | fn hash(&self, state: &mut H) { 145 | if mem::size_of::() == mem::size_of::() { 146 | // Thin pointer 147 | state.write_usize(*self as *const () as usize); 148 | } else { 149 | // Fat pointer 150 | let (a, b) = unsafe { *(self as *const Self as *const (usize, usize)) }; 151 | state.write_usize(a); 152 | state.write_usize(b); 153 | } 154 | } 155 | } 156 | 157 | impl Hashable for *mut T { 158 | #[allow(trivial_casts, unsafe_code)] 159 | fn hash(&self, state: &mut H) { 160 | if mem::size_of::() == mem::size_of::() { 161 | // Thin pointer 162 | state.write_usize(*self as *const () as usize); 163 | } else { 164 | // Fat pointer 165 | let (a, b) = unsafe { *(self as *const Self as *const (usize, usize)) }; 166 | state.write_usize(a); 167 | state.write_usize(b); 168 | } 169 | } 170 | } 171 | -------------------------------------------------------------------------------- /tests/test_arities.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(tarpaulin_include))] 2 | pub mod common; 3 | 4 | use typenum::{Unsigned, U0, U2, U3, U4, U5, U8}; 5 | 6 | use merkletree::merkle::{get_merkle_tree_len_generic, Element, MerkleTree}; 7 | use merkletree::store::VecStore; 8 | 9 | use crate::common::{ 10 | get_vector_of_base_trees, instantiate_new, test_disk_mmap_vec_tree_functionality, TestItemType, 11 | TestSha256Hasher, 12 | }; 13 | 14 | #[test] 15 | fn test_base_tree_arities() { 16 | fn run_test(leaves: usize, root: TestItemType) { 17 | let expected_leaves = leaves; 18 | let len = get_merkle_tree_len_generic::(leaves) 19 | .expect("[test_base_tree_arities] couldn't compute Merkle Tree len"); 20 | 21 | let tree = instantiate_new::< 22 | TestItemType, 23 | TestSha256Hasher, 24 | VecStore, 25 | BaseTreeArity, 26 | >(leaves, None); 27 | test_disk_mmap_vec_tree_functionality::< 28 | TestItemType, 29 | TestSha256Hasher, 30 | VecStore, 31 | BaseTreeArity, 32 | U0, 33 | U0, 34 | >(tree, expected_leaves, len, root); 35 | } 36 | 37 | let root = TestItemType::from_slice(&[ 38 | 142, 226, 200, 91, 184, 251, 142, 223, 219, 43, 122, 241, 23, 37, 97, 46, 39 | ]); 40 | run_test::(4, root); 41 | 42 | let root = TestItemType::from_slice(&[ 43 | 128, 59, 187, 58, 199, 144, 7, 238, 128, 146, 124, 33, 241, 16, 92, 221, 44 | ]); 45 | run_test::(16, root); 46 | 47 | let root = TestItemType::from_slice(&[ 48 | 252, 61, 163, 229, 140, 223, 198, 165, 200, 137, 59, 43, 83, 136, 197, 63, 49 | ]); 50 | run_test::(64, root); 51 | } 52 | 53 | #[test] 54 | fn test_compound_tree_arities() { 55 | fn run_test( 56 | leaves: usize, 57 | root: TestItemType, 58 | ) { 59 | let expected_leaves = leaves * SubTreeArity::to_usize(); 60 | let len = get_merkle_tree_len_generic::(leaves) 61 | .expect("[test_compound_tree_arities] couldn't compute Merkle Tree len"); 62 | 63 | let tree = MerkleTree::< 64 | TestItemType, 65 | TestSha256Hasher, 66 | VecStore, 67 | BaseTreeArity, 68 | SubTreeArity, 69 | >::from_trees(get_vector_of_base_trees::< 70 | TestItemType, 71 | TestSha256Hasher, 72 | VecStore, 73 | BaseTreeArity, 74 | SubTreeArity, 75 | >(leaves)) 76 | .expect("can't instantiate compound tree [test_compound_tree_arities]"); 77 | 78 | test_disk_mmap_vec_tree_functionality::< 79 | TestItemType, 80 | TestSha256Hasher, 81 | VecStore, 82 | BaseTreeArity, 83 | SubTreeArity, 84 | U0, 85 | >(tree, expected_leaves, len, root); 86 | } 87 | 88 | let root = TestItemType::from_slice(&[ 89 | 57, 201, 227, 235, 242, 179, 108, 46, 157, 200, 126, 217, 134, 232, 141, 223, 90 | ]); 91 | run_test::(4, root); 92 | 93 | let root = TestItemType::from_slice(&[ 94 | 146, 59, 189, 83, 119, 102, 147, 207, 178, 121, 11, 190, 241, 152, 67, 0, 95 | ]); 96 | run_test::(16, root); 97 | 98 | let root = TestItemType::from_slice(&[ 99 | 32, 129, 168, 134, 58, 233, 155, 225, 88, 230, 247, 63, 18, 38, 194, 230, 100 | ]); 101 | run_test::(64, root); 102 | 103 | let root = TestItemType::from_slice(&[ 104 | 81, 96, 135, 96, 165, 113, 149, 203, 222, 86, 102, 127, 139, 194, 78, 22, 105 | ]); 106 | run_test::(4, root); 107 | 108 | let root = TestItemType::from_slice(&[ 109 | 149, 57, 53, 8, 68, 184, 94, 209, 244, 218, 43, 172, 185, 215, 193, 99, 110 | ]); 111 | run_test::(64, root); 112 | 113 | let root = TestItemType::from_slice(&[ 114 | 127, 19, 226, 22, 109, 131, 88, 30, 221, 228, 251, 183, 147, 248, 2, 186, 115 | ]); 116 | run_test::(4, root); 117 | 118 | let root = TestItemType::from_slice(&[ 119 | 67, 94, 188, 238, 85, 194, 96, 252, 163, 54, 119, 99, 218, 210, 231, 190, 120 | ]); 121 | run_test::(16, root); 122 | } 123 | 124 | #[test] 125 | fn test_compound_compound_tree_arities() { 126 | fn run_test( 127 | leaves: usize, 128 | root: TestItemType, 129 | ) { 130 | let expected_leaves = leaves * SubTreeArity::to_usize() * TopTreeArity::to_usize(); 131 | let len = get_merkle_tree_len_generic::(leaves) 132 | .expect("[test_compound_compound_tree_arities] couldn't compute Merkle Tree len"); 133 | 134 | let base_trees = (0..TopTreeArity::to_usize()) 135 | .flat_map(|_| { 136 | get_vector_of_base_trees::< 137 | TestItemType, 138 | TestSha256Hasher, 139 | VecStore, 140 | BaseTreeArity, 141 | SubTreeArity, 142 | >(leaves) 143 | }) 144 | .collect::>>(); 145 | 146 | let tree = MerkleTree::from_sub_trees_as_trees(base_trees).expect( 147 | "can't instantiate compound-compound tree [test_compound_compound_tree_arities]", 148 | ); 149 | 150 | test_disk_mmap_vec_tree_functionality::< 151 | TestItemType, 152 | TestSha256Hasher, 153 | VecStore, 154 | BaseTreeArity, 155 | SubTreeArity, 156 | TopTreeArity, 157 | >(tree, expected_leaves, len, root); 158 | } 159 | 160 | let root = TestItemType::from_slice(&[ 161 | 77, 96, 160, 26, 181, 161, 25, 63, 24, 181, 60, 43, 45, 20, 246, 181, 162 | ]); 163 | run_test::(4, root); 164 | 165 | let root = TestItemType::from_slice(&[ 166 | 52, 152, 123, 224, 174, 42, 152, 12, 199, 4, 105, 245, 176, 59, 230, 86, 167 | ]); 168 | run_test::(64, root); 169 | 170 | // TODO investigate whether limitations of 'from_sub_trees_as_trees' constructors are reasonable 171 | // run_test::(4, root); 172 | // run_test::(4, root); 173 | // run_test::(64, root); 174 | // run_test::(64, root); 175 | // run_test::(64, root); 176 | // run_test::(4, root); 177 | // etc... 178 | } 179 | -------------------------------------------------------------------------------- /src/store/mmap.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{File, OpenOptions}; 2 | use std::marker::PhantomData; 3 | use std::ops; 4 | use std::path::Path; 5 | 6 | use anyhow::{Context, Result}; 7 | use log::error; 8 | use memmap2::MmapMut; 9 | 10 | use crate::merkle::Element; 11 | use crate::store::{Store, StoreConfig}; 12 | 13 | /// Store that saves the data on disk, and accesses it using memmap. 14 | #[derive(Debug)] 15 | pub struct MmapStore { 16 | map: Option, 17 | file: File, 18 | len: usize, 19 | _e: PhantomData, 20 | } 21 | 22 | impl ops::Deref for MmapStore { 23 | type Target = [u8]; 24 | 25 | fn deref(&self) -> &Self::Target { 26 | &self.map.as_ref().expect("couldn't dereference")[..] 27 | } 28 | } 29 | 30 | impl Store for MmapStore { 31 | #[allow(unsafe_code)] 32 | fn new_with_config(size: usize, branches: usize, config: StoreConfig) -> Result { 33 | let data_path = StoreConfig::data_path(&config.path, &config.id); 34 | 35 | // If the specified file exists, load it from disk. 36 | if Path::new(&data_path).exists() { 37 | return Self::new_from_disk(size, branches, &config); 38 | } 39 | 40 | // Otherwise, create the file and allow it to be the on-disk store. 41 | let file = OpenOptions::new() 42 | .write(true) 43 | .read(true) 44 | .create_new(true) 45 | .open(&data_path)?; 46 | 47 | let store_size = E::byte_len() * size; 48 | file.set_len(store_size as u64)?; 49 | 50 | let map = unsafe { MmapMut::map_mut(&file)? }; 51 | 52 | Ok(MmapStore { 53 | map: Some(map), 54 | file, 55 | len: 0, 56 | _e: Default::default(), 57 | }) 58 | } 59 | 60 | #[allow(unsafe_code)] 61 | fn new(size: usize) -> Result { 62 | let store_size = E::byte_len() * size; 63 | 64 | let file = tempfile::NamedTempFile::new()?; 65 | file.as_file().set_len(store_size as u64)?; 66 | let (file, _path) = file.into_parts(); 67 | let map = unsafe { MmapMut::map_mut(&file)? }; 68 | 69 | Ok(MmapStore { 70 | map: Some(map), 71 | file, 72 | len: 0, 73 | _e: Default::default(), 74 | }) 75 | } 76 | 77 | #[allow(unsafe_code)] 78 | fn new_from_disk(size: usize, _branches: usize, config: &StoreConfig) -> Result { 79 | let data_path = StoreConfig::data_path(&config.path, &config.id); 80 | ensure!(Path::new(&data_path).exists(), "[MmapStore] new_from_disk constructor can be used only for instantiating already existing storages"); 81 | 82 | // MmapStore expects only read/write file 83 | let file = match OpenOptions::new().write(true).read(true).open(&data_path) { 84 | Ok(file) => file, 85 | Err(e) => { 86 | if e.kind() == std::io::ErrorKind::PermissionDenied { 87 | error!("MmapStore doesn't support read-only storages"); 88 | } 89 | panic!("{}", e) 90 | } 91 | }; 92 | 93 | let metadata = file.metadata()?; 94 | let store_size = metadata.len() as usize; 95 | 96 | // Sanity check. 97 | ensure!( 98 | store_size == size * E::byte_len(), 99 | "Invalid formatted file provided. Expected {} bytes, found {} bytes", 100 | size * E::byte_len(), 101 | store_size 102 | ); 103 | 104 | let map = unsafe { MmapMut::map_mut(&file)? }; 105 | 106 | Ok(MmapStore { 107 | map: Some(map), 108 | file, 109 | len: size, 110 | _e: Default::default(), 111 | }) 112 | } 113 | 114 | fn write_at(&mut self, el: E, index: usize) -> Result<()> { 115 | let start = index * E::byte_len(); 116 | let end = start + E::byte_len(); 117 | 118 | if self.map.is_none() { 119 | self.reinit()?; 120 | } 121 | 122 | //unwrap is safe as we checked map to be initialised 123 | self.map.as_mut().unwrap()[start..end].copy_from_slice(el.as_ref()); 124 | self.len = std::cmp::max(self.len, index + 1); 125 | 126 | Ok(()) 127 | } 128 | 129 | fn copy_from_slice(&mut self, buf: &[u8], start: usize) -> Result<()> { 130 | ensure!( 131 | buf.len() % E::byte_len() == 0, 132 | "buf size must be a multiple of {}", 133 | E::byte_len() 134 | ); 135 | 136 | let map_start = start * E::byte_len(); 137 | let map_end = map_start + buf.len(); 138 | 139 | if self.map.is_none() { 140 | self.reinit()?; 141 | } 142 | 143 | // unwrap is safe as we checked map to be initialised 144 | self.map.as_mut().unwrap()[map_start..map_end].copy_from_slice(buf); 145 | self.len = std::cmp::max(self.len, start + (buf.len() / E::byte_len())); 146 | 147 | Ok(()) 148 | } 149 | 150 | fn new_from_slice_with_config( 151 | size: usize, 152 | branches: usize, 153 | data: &[u8], 154 | config: StoreConfig, 155 | ) -> Result { 156 | ensure!( 157 | data.len() % E::byte_len() == 0, 158 | "data size must be a multiple of {}", 159 | E::byte_len() 160 | ); 161 | 162 | let mut store = Self::new_with_config(size, branches, config)?; 163 | 164 | // If the store was loaded from disk (based on the config 165 | // information, avoid re-populating the store at this point 166 | // since it can be assumed by the config that the data is 167 | // already correct). 168 | if !store.loaded_from_disk() { 169 | if store.map.is_none() { 170 | store.reinit()?; 171 | } 172 | 173 | let len = data.len(); 174 | 175 | // unwrap is safe as we checked map to be initialised 176 | store.map.as_mut().unwrap()[0..len].copy_from_slice(data); 177 | store.len = len / E::byte_len(); 178 | } 179 | 180 | Ok(store) 181 | } 182 | 183 | fn new_from_slice(size: usize, data: &[u8]) -> Result { 184 | ensure!( 185 | data.len() % E::byte_len() == 0, 186 | "data size must be a multiple of {}", 187 | E::byte_len() 188 | ); 189 | 190 | let mut store = Self::new(size)?; 191 | ensure!(store.map.is_some(), "Internal map needs to be initialized"); 192 | 193 | let len = data.len(); 194 | // unwrap is safe as we checked map to be initialised 195 | store.map.as_mut().unwrap()[0..len].copy_from_slice(data); 196 | store.len = len / E::byte_len(); 197 | 198 | Ok(store) 199 | } 200 | 201 | fn read_at(&self, index: usize) -> Result { 202 | let start = index * E::byte_len(); 203 | let end = start + E::byte_len(); 204 | let len = self.len * E::byte_len(); 205 | 206 | ensure!(start < len, "start out of range {} >= {}", start, len); 207 | ensure!(end <= len, "end out of range {} > {}", end, len); 208 | 209 | let data = self 210 | .map 211 | .as_ref() 212 | .context("Internal map needs to be initialized")?; 213 | Ok(E::from_slice(&data[start..end])) 214 | } 215 | 216 | fn read_into(&self, index: usize, buf: &mut [u8]) -> Result<()> { 217 | let start = index * E::byte_len(); 218 | let end = start + E::byte_len(); 219 | let len = self.len * E::byte_len(); 220 | 221 | ensure!(start < len, "start out of range {} >= {}", start, len); 222 | ensure!(end <= len, "end out of range {} > {}", end, len); 223 | 224 | let data = self 225 | .map 226 | .as_ref() 227 | .context("Internal map needs to be initialized")?; 228 | buf.copy_from_slice(&data[start..end]); 229 | 230 | Ok(()) 231 | } 232 | 233 | fn read_range_into(&self, _start: usize, _end: usize, _buf: &mut [u8]) -> Result<()> { 234 | unimplemented!("Not required here"); 235 | } 236 | 237 | fn read_range(&self, r: ops::Range) -> Result> { 238 | let start = r.start * E::byte_len(); 239 | let end = r.end * E::byte_len(); 240 | let len = self.len * E::byte_len(); 241 | 242 | ensure!(start < len, "start out of range {} >= {}", start, len); 243 | ensure!(end <= len, "end out of range {} > {}", end, len); 244 | 245 | let data = self 246 | .map 247 | .as_ref() 248 | .context("Internal map needs to be initialized")?; 249 | Ok(data[start..end] 250 | .chunks(E::byte_len()) 251 | .map(E::from_slice) 252 | .collect()) 253 | } 254 | 255 | fn len(&self) -> usize { 256 | self.len 257 | } 258 | 259 | fn loaded_from_disk(&self) -> bool { 260 | false 261 | } 262 | 263 | fn compact( 264 | &mut self, 265 | _branches: usize, 266 | _config: StoreConfig, 267 | _store_version: u32, 268 | ) -> Result { 269 | let map = self.map.take(); 270 | 271 | Ok(map.is_some()) 272 | } 273 | 274 | #[allow(unsafe_code)] 275 | fn reinit(&mut self) -> Result<()> { 276 | self.map = unsafe { Some(MmapMut::map_mut(&self.file)?) }; 277 | ensure!(self.map.is_some(), "Re-init mapping failed"); 278 | 279 | Ok(()) 280 | } 281 | 282 | fn delete(_config: StoreConfig) -> Result<()> { 283 | Ok(()) 284 | } 285 | 286 | fn is_empty(&self) -> bool { 287 | self.len == 0 288 | } 289 | 290 | fn push(&mut self, el: E) -> Result<()> { 291 | let l = self.len; 292 | 293 | if self.map.is_none() { 294 | self.reinit()?; 295 | } 296 | 297 | ensure!( 298 | // unwrap is safe as we checked map to be initialised 299 | (l + 1) * E::byte_len() <= self.map.as_ref().unwrap().len(), 300 | "not enough space" 301 | ); 302 | 303 | self.write_at(el, l) 304 | } 305 | } 306 | -------------------------------------------------------------------------------- /tests/test_compound_constructors.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(tarpaulin_include))] 2 | pub mod common; 3 | 4 | use typenum::{Unsigned, U0, U8}; 5 | 6 | use merkletree::hash::Algorithm; 7 | use merkletree::merkle::{ 8 | get_merkle_tree_len_generic, get_merkle_tree_row_count, Element, MerkleTree, 9 | }; 10 | use merkletree::store::{DiskStore, MmapStore, Store, StoreConfig, VecStore}; 11 | 12 | use common::{ 13 | get_vector_of_base_trees, get_vector_of_base_trees_as_slices, instantiate_new_with_config, 14 | serialize_tree, test_disk_mmap_vec_tree_functionality, TestItem, TestItemType, 15 | TestSha256Hasher, TestXOR128, 16 | }; 17 | 18 | /// Compound tree constructors 19 | fn instantiate_ctree_from_trees< 20 | E: Element, 21 | A: Algorithm, 22 | S: Store, 23 | BaseTreeArity: Unsigned, 24 | SubTreeArity: Unsigned, 25 | >( 26 | base_tree_leaves: usize, 27 | ) -> MerkleTree { 28 | let base_trees = 29 | get_vector_of_base_trees::(base_tree_leaves); 30 | MerkleTree::from_trees(base_trees).expect("failed to instantiate compound tree [from_trees]") 31 | } 32 | 33 | fn instantiate_ctree_from_stores< 34 | E: Element, 35 | A: Algorithm, 36 | S: Store, 37 | BaseTreeArity: Unsigned, 38 | SubTreeArity: Unsigned, 39 | >( 40 | base_tree_leaves: usize, 41 | ) -> MerkleTree { 42 | let base_trees = 43 | get_vector_of_base_trees::(base_tree_leaves); 44 | let mut stores = Vec::new(); 45 | for tree in base_trees { 46 | let serialized_tree = serialize_tree(tree); 47 | stores.push( 48 | S::new_from_slice(serialized_tree.len(), &serialized_tree) 49 | .expect("can't create new store over existing one [from_stores]"), 50 | ); 51 | } 52 | 53 | MerkleTree::from_stores(base_tree_leaves, stores) 54 | .expect("failed to instantiate compound tree [from_stores]") 55 | } 56 | 57 | fn instantiate_ctree_from_slices< 58 | E: Element, 59 | A: Algorithm, 60 | S: Store, 61 | BaseTreeArity: Unsigned, 62 | SubTreeArity: Unsigned, 63 | >( 64 | base_tree_leaves: usize, 65 | ) -> MerkleTree { 66 | let base_trees = get_vector_of_base_trees_as_slices::( 67 | base_tree_leaves, 68 | ); 69 | let vec_of_slices: Vec<&[u8]> = base_trees.iter().map(|x| &x[..]).collect(); 70 | 71 | MerkleTree::::from_slices( 72 | &vec_of_slices[..], 73 | base_tree_leaves, 74 | ) 75 | .expect("failed to instantiate compound tree from set of base trees [from_slices]") 76 | } 77 | 78 | fn instantiate_ctree_from_slices_with_configs< 79 | E: Element, 80 | A: Algorithm, 81 | S: Store, 82 | BaseTreeArity: Unsigned, 83 | SubTreeArity: Unsigned, 84 | >( 85 | base_tree_leaves: usize, 86 | ) -> MerkleTree { 87 | let base_trees = get_vector_of_base_trees_as_slices::( 88 | base_tree_leaves, 89 | ); 90 | let vec_of_slices: Vec<&[u8]> = base_trees.iter().map(|x| &x[..]).collect(); 91 | 92 | let vec_of_configs = (0..vec_of_slices.len()) 93 | .map(|_| StoreConfig::default()) 94 | .collect::>(); 95 | 96 | MerkleTree::::from_slices_with_configs( 97 | &vec_of_slices[..], 98 | base_tree_leaves, 99 | &vec_of_configs[..], 100 | ) 101 | .expect("failed to instantiate compound tree [from_slices_with_configs]") 102 | } 103 | 104 | fn instantiate_ctree_from_store_configs< 105 | E: Element, 106 | A: Algorithm, 107 | S: Store, 108 | BaseTreeArity: Unsigned, 109 | SubTreeArity: Unsigned, 110 | >( 111 | base_tree_leaves: usize, 112 | ) -> MerkleTree { 113 | let distinguisher = "instantiate_ctree_from_store_configs"; 114 | let temp_dir = tempfile::Builder::new() 115 | .prefix(distinguisher) 116 | .tempdir() 117 | .expect("can't create temp dir [from_store_configs]"); 118 | 119 | // compute len for base tree as we are going to instantiate compound tree from set of base trees 120 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 121 | .expect("can't get tree len [from_store_configs]"); 122 | let row_count = get_merkle_tree_row_count(base_tree_leaves, BaseTreeArity::to_usize()); 123 | 124 | let vec_of_configs = (0..SubTreeArity::to_usize()) 125 | .map(|index| { 126 | let replica = format!( 127 | "{}-{}-{}-{}-{}-replica", 128 | distinguisher, index, base_tree_leaves, len, row_count, 129 | ); 130 | 131 | let config = StoreConfig::new( 132 | temp_dir.path(), 133 | replica, 134 | StoreConfig::default_rows_to_discard(base_tree_leaves, BaseTreeArity::to_usize()), 135 | ); 136 | // we need to instantiate a tree in order to dump tree data into Disk-based storages and bind them to configs 137 | instantiate_new_with_config::( 138 | base_tree_leaves, 139 | Some(config.clone()), 140 | ); 141 | config 142 | }) 143 | .collect::>(); 144 | 145 | MerkleTree::from_store_configs(base_tree_leaves, &vec_of_configs) 146 | .expect("failed to instantiate compound tree [from_store_configs]") 147 | } 148 | 149 | /// Test executor 150 | fn run_test_compound_tree< 151 | E: Element, 152 | A: Algorithm, 153 | S: Store, 154 | BaseTreeArity: Unsigned, 155 | SubTreeArity: Unsigned, 156 | >( 157 | constructor: fn(usize) -> MerkleTree, 158 | base_tree_leaves: usize, 159 | expected_leaves: usize, 160 | expected_len: usize, 161 | expected_root: E, 162 | ) { 163 | let compound_tree: MerkleTree = 164 | constructor(base_tree_leaves); 165 | test_disk_mmap_vec_tree_functionality::( 166 | compound_tree, 167 | expected_leaves, 168 | expected_len, 169 | expected_root, 170 | ); 171 | } 172 | 173 | /// Ultimately we cover following list of constructors for compound trees 174 | /// - from_trees 175 | /// - from_stores 176 | /// - from_slices 177 | /// - from_slices_with_configs 178 | /// - from_store_configs 179 | #[test] 180 | fn test_compound_constructors() { 181 | fn run_tests, S: Store>(root: E) { 182 | let base_tree_leaves = 64; 183 | let expected_total_leaves = base_tree_leaves * 8; 184 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 185 | .expect("[test_compound_constructors] couldn't compute Merkle Tree len"); 186 | 187 | run_test_compound_tree::( 188 | instantiate_ctree_from_trees, 189 | base_tree_leaves, 190 | expected_total_leaves, 191 | len, 192 | root, 193 | ); 194 | 195 | run_test_compound_tree::( 196 | instantiate_ctree_from_stores, 197 | base_tree_leaves, 198 | expected_total_leaves, 199 | len, 200 | root, 201 | ); 202 | 203 | run_test_compound_tree::( 204 | instantiate_ctree_from_slices, 205 | base_tree_leaves, 206 | expected_total_leaves, 207 | len, 208 | root, 209 | ); 210 | 211 | run_test_compound_tree::( 212 | instantiate_ctree_from_slices_with_configs, 213 | base_tree_leaves, 214 | expected_total_leaves, 215 | len, 216 | root, 217 | ); 218 | } 219 | let root_xor128 = TestItem::from_slice(&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); 220 | run_tests::>(root_xor128); 221 | // TODO: investigate why these tests fail 222 | //run_tests::>(root_xor128); 223 | //run_tests::>(root_xor128); 224 | 225 | let root_sha256 = TestItem::from_slice(&[ 226 | 32, 129, 168, 134, 58, 233, 155, 225, 88, 230, 247, 63, 18, 38, 194, 230, 227 | ]); 228 | run_tests::>(root_sha256); 229 | // TODO: investigate why these tests fail 230 | //run_tests::>(root_sha256); 231 | //run_tests::>(root_sha256); 232 | 233 | let base_tree_leaves = 64; 234 | let expected_total_leaves = base_tree_leaves * 8; 235 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 236 | .expect("[test_compound_constructors] couldn't compute Merkle Tree len"); 237 | 238 | // this instantiator works only with DiskStore / MmapStore trees 239 | run_test_compound_tree::, U8, U8>( 240 | instantiate_ctree_from_store_configs, 241 | base_tree_leaves, 242 | expected_total_leaves, 243 | len, 244 | root_xor128, 245 | ); 246 | run_test_compound_tree::, U8, U8>( 247 | instantiate_ctree_from_store_configs, 248 | base_tree_leaves, 249 | expected_total_leaves, 250 | len, 251 | root_sha256, 252 | ); 253 | 254 | // same instantiator for MmapStore.. 255 | run_test_compound_tree::, U8, U8>( 256 | instantiate_ctree_from_store_configs, 257 | base_tree_leaves, 258 | expected_total_leaves, 259 | len, 260 | root_xor128, 261 | ); 262 | run_test_compound_tree::, U8, U8>( 263 | instantiate_ctree_from_store_configs, 264 | base_tree_leaves, 265 | expected_total_leaves, 266 | len, 267 | root_sha256, 268 | ); 269 | } 270 | -------------------------------------------------------------------------------- /tests/test_compound_compound_constructors.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(tarpaulin_include))] 2 | pub mod common; 3 | 4 | use typenum::{Unsigned, U0, U2, U8}; 5 | 6 | use merkletree::hash::Algorithm; 7 | use merkletree::merkle::{get_merkle_tree_len_generic, Element, MerkleTree}; 8 | use merkletree::store::{DiskStore, MmapStore, Store, StoreConfig, VecStore}; 9 | 10 | use crate::common::{ 11 | get_vector_of_base_trees, instantiate_new, instantiate_new_with_config, 12 | test_disk_mmap_vec_tree_functionality, TestItem, TestItemType, TestSha256Hasher, TestXOR128, 13 | }; 14 | 15 | /// Compound-compound tree constructors 16 | fn instantiate_cctree_from_sub_trees< 17 | E: Element, 18 | A: Algorithm, 19 | S: Store, 20 | BaseTreeArity: Unsigned, 21 | SubTreeArity: Unsigned, 22 | TopTreeArity: Unsigned, 23 | >( 24 | base_tree_leaves: usize, 25 | ) -> MerkleTree { 26 | let compound_trees = (0..TopTreeArity::to_usize()) 27 | .map(|_| { 28 | let base_trees = 29 | get_vector_of_base_trees::(base_tree_leaves); 30 | MerkleTree::from_trees(base_trees) 31 | .expect("failed to instantiate compound tree [instantiate_cctree_from_sub_trees]") 32 | }) 33 | .collect(); 34 | 35 | MerkleTree::from_sub_trees(compound_trees) 36 | .expect("failed to instantiate compound-compound tree from compound trees [instantiate_cctree_from_sub_trees]") 37 | } 38 | 39 | fn instantiate_cctree_from_sub_trees_as_trees< 40 | E: Element, 41 | A: Algorithm, 42 | S: Store, 43 | BaseTreeArity: Unsigned, 44 | SubTreeArity: Unsigned, 45 | TopTreeArity: Unsigned, 46 | >( 47 | base_tree_leaves: usize, 48 | ) -> MerkleTree { 49 | let base_trees = (0..TopTreeArity::to_usize()) 50 | .flat_map(|_| { 51 | (0..SubTreeArity::to_usize()) 52 | .map(|_| instantiate_new(base_tree_leaves, None)) 53 | .collect::>>() 54 | }) 55 | .collect(); 56 | 57 | MerkleTree::from_sub_trees_as_trees(base_trees) 58 | .expect("failed to instantiate compound-compound tree from set of base trees [instantiate_cctree_from_sub_trees_as_trees]") 59 | } 60 | 61 | fn instantiate_cctree_from_sub_tree_store_configs< 62 | E: Element, 63 | A: Algorithm, 64 | S: Store, 65 | BaseTreeArity: Unsigned, 66 | SubTreeArity: Unsigned, 67 | TopTreeArity: Unsigned, 68 | >( 69 | base_tree_leaves: usize, 70 | ) -> MerkleTree { 71 | let distinguisher = "instantiate_cctree_from_sub_tree_store_configs"; 72 | let temp_dir = tempfile::Builder::new() 73 | .prefix(distinguisher) 74 | .tempdir() 75 | .expect("can't create temp dir [instantiate_cctree_from_sub_tree_store_configs]"); 76 | 77 | // compute len for base tree as we are going to instantiate compound tree from set of base trees 78 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 79 | .expect("can't get tree len [instantiate_cctree_from_sub_tree_store_configs]"); 80 | 81 | let configs = (0..TopTreeArity::to_usize()) 82 | .flat_map(|j| { 83 | (0..SubTreeArity::to_usize()) 84 | .map(|i| { 85 | let replica = format!( 86 | "{}-{}-{}-{}-{}-replica", 87 | distinguisher, i, j, base_tree_leaves, len, 88 | ); 89 | 90 | // we attempt to discard all intermediate layers, except bottom one (set of leaves) and top-level root of base tree 91 | let config = StoreConfig::new(temp_dir.path(), replica, 0); 92 | // we need to instantiate a tree in order to dump tree data into Disk-based storages and bind them to configs 93 | instantiate_new_with_config::( 94 | base_tree_leaves, 95 | Some(config.clone()), 96 | ); 97 | config 98 | }) 99 | .collect::>() 100 | }) 101 | .collect::>(); 102 | 103 | MerkleTree::from_sub_tree_store_configs(base_tree_leaves, &configs) 104 | .expect("failed to instantiate compound-compound tree [instantiate_cctree_from_sub_tree_store_configs]") 105 | } 106 | 107 | fn instantiate_cctree_from_sub_tree_readonly_store_configs< 108 | E: Element, 109 | A: Algorithm, 110 | S: Store, 111 | BaseTreeArity: Unsigned, 112 | SubTreeArity: Unsigned, 113 | TopTreeArity: Unsigned, 114 | >( 115 | base_tree_leaves: usize, 116 | ) -> MerkleTree { 117 | let distinguisher = "instantiate_cctree_from_sub_tree_readonly_store_configs"; 118 | let temp_dir = tempfile::Builder::new() 119 | .prefix(distinguisher) 120 | .tempdir() 121 | .expect("can't create temp dir [instantiate_cctree_from_sub_tree_readonly_store_configs]"); 122 | 123 | // compute len for base tree as we are going to instantiate compound tree from set of base trees 124 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 125 | .expect("can't get tree len [instantiate_cctree_from_sub_tree_readonly_store_configs]"); 126 | 127 | let configs = (0..TopTreeArity::to_usize()) 128 | .flat_map(|j| { 129 | (0..SubTreeArity::to_usize()) 130 | .map(|i| { 131 | let replica = format!( 132 | "{}-{}-{}-{}-{}-replica", 133 | distinguisher, i, j, base_tree_leaves, len, 134 | ); 135 | 136 | // we attempt to discard all intermediate layers, except bottom one (set of leaves) and top-level root of base tree 137 | let config = StoreConfig::new(temp_dir.path(), replica, 0); 138 | // we need to instantiate a tree in order to dump tree data into Disk-based storages and bind them to configs 139 | instantiate_new_with_config::( 140 | base_tree_leaves, 141 | Some(config.clone()), 142 | ); 143 | config 144 | }) 145 | .collect::>() 146 | }) 147 | .collect::>(); 148 | 149 | // once we have our sub trees instantiated and storages are now filled with data, let's make them read-only 150 | for config in configs.clone() { 151 | let data_path = StoreConfig::data_path(&config.path, &config.id); 152 | let metadata = data_path.metadata().expect("can't get metadata"); 153 | let mut permissions = metadata.permissions(); 154 | permissions.set_readonly(true); 155 | std::fs::set_permissions(&data_path, permissions).expect("couldn't apply permissions"); 156 | } 157 | 158 | let tree = MerkleTree::from_sub_tree_store_configs(base_tree_leaves, &configs) 159 | .expect("failed to instantiate compound-compound tree [instantiate_cctree_from_sub_tree_readonly_store_configs]"); 160 | tree 161 | } 162 | 163 | /// Test executor 164 | fn run_test_compound_compound_tree< 165 | E: Element, 166 | A: Algorithm, 167 | S: Store, 168 | BaseTreeArity: Unsigned, 169 | SubTreeArity: Unsigned, 170 | TopTreeArity: Unsigned, 171 | >( 172 | constructor: fn(usize) -> MerkleTree, 173 | base_tree_leaves: usize, 174 | expected_leaves: usize, 175 | expected_len: usize, 176 | expected_root: E, 177 | ) { 178 | let compound_tree: MerkleTree = 179 | constructor(base_tree_leaves); 180 | test_disk_mmap_vec_tree_functionality::( 181 | compound_tree, 182 | expected_leaves, 183 | expected_len, 184 | expected_root, 185 | ); 186 | } 187 | 188 | /// Ultimately we cover following list of constructors for compound-compound trees 189 | /// - from_sub_trees 190 | /// - from_sub_trees_as_trees 191 | /// - from_sub_tree_store_configs 192 | #[test] 193 | fn test_compound_compound_constructors() { 194 | fn run_test, S: Store>(root: E) { 195 | let base_tree_leaves = 64; 196 | let expected_total_leaves = base_tree_leaves * 8 * 2; 197 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 198 | .expect("[test_compound_compound_constructors] couldn't compute Merkle Tree len"); 199 | 200 | run_test_compound_compound_tree::( 201 | instantiate_cctree_from_sub_trees, 202 | base_tree_leaves, 203 | expected_total_leaves, 204 | len, 205 | root, 206 | ); 207 | 208 | run_test_compound_compound_tree::( 209 | instantiate_cctree_from_sub_trees_as_trees, 210 | base_tree_leaves, 211 | expected_total_leaves, 212 | len, 213 | root, 214 | ); 215 | } 216 | 217 | let root_xor128 = TestItem::from_slice(&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); 218 | run_test::>(root_xor128); 219 | 220 | let root_sha256 = TestItem::from_slice(&[ 221 | 52, 152, 123, 224, 174, 42, 152, 12, 199, 4, 105, 245, 176, 59, 230, 86, 222 | ]); 223 | run_test::>(root_sha256); 224 | 225 | let base_tree_leaves = 64; 226 | let expected_total_leaves = base_tree_leaves * 8 * 2; 227 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 228 | .expect("[test_compound_compound_constructors] couldn't compute Merkle Tree len"); 229 | 230 | // this instantiator works only with DiskStore / MmapStore trees 231 | run_test_compound_compound_tree::, U8, U8, U2>( 232 | instantiate_cctree_from_sub_tree_store_configs, 233 | base_tree_leaves, 234 | expected_total_leaves, 235 | len, 236 | root_xor128, 237 | ); 238 | 239 | run_test_compound_compound_tree::< 240 | TestItemType, 241 | TestSha256Hasher, 242 | DiskStore, 243 | U8, 244 | U8, 245 | U2, 246 | >( 247 | instantiate_cctree_from_sub_tree_store_configs, 248 | base_tree_leaves, 249 | expected_total_leaves, 250 | len, 251 | root_sha256, 252 | ); 253 | 254 | // same instantiator for MmapStore.. 255 | run_test_compound_compound_tree::, U8, U8, U2>( 256 | instantiate_cctree_from_sub_tree_store_configs, 257 | base_tree_leaves, 258 | expected_total_leaves, 259 | len, 260 | root_xor128, 261 | ); 262 | 263 | run_test_compound_compound_tree::< 264 | TestItemType, 265 | TestSha256Hasher, 266 | MmapStore, 267 | U8, 268 | U8, 269 | U2, 270 | >( 271 | instantiate_cctree_from_sub_tree_store_configs, 272 | base_tree_leaves, 273 | expected_total_leaves, 274 | len, 275 | root_sha256, 276 | ); 277 | } 278 | 279 | #[test] 280 | fn test_with_readonly_disk_storages() { 281 | env_logger::init(); 282 | 283 | let base_tree_leaves = 64; 284 | let expected_total_leaves = base_tree_leaves * 8 * 2; 285 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 286 | .expect("[test_compound_compound_constructors] couldn't compute Merkle Tree len"); 287 | let root_sha256 = TestItem::from_slice(&[ 288 | 52, 152, 123, 224, 174, 42, 152, 12, 199, 4, 105, 245, 176, 59, 230, 86, 289 | ]); 290 | 291 | run_test_compound_compound_tree::< 292 | TestItemType, 293 | TestSha256Hasher, 294 | DiskStore, 295 | U8, 296 | U8, 297 | U2, 298 | >( 299 | instantiate_cctree_from_sub_tree_readonly_store_configs, 300 | base_tree_leaves, 301 | expected_total_leaves, 302 | len, 303 | root_sha256, 304 | ); 305 | } 306 | -------------------------------------------------------------------------------- /tests/common.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(tarpaulin_include))] 2 | use std::fmt; 3 | use std::hash::Hasher; 4 | use std::io::Write; 5 | 6 | use sha2::{Digest, Sha256}; 7 | use typenum::Unsigned; 8 | 9 | use merkletree::hash::{Algorithm, Hashable}; 10 | use merkletree::merkle::{Element, MerkleTree}; 11 | use merkletree::store::{DiskStore, LevelCacheStore, Store, StoreConfig}; 12 | 13 | /// This is the common utilities that we use for integration tests 14 | /// 15 | /// In order to check that particular merkle tree will work as expected we need following stuff: 16 | /// 17 | /// - actual logic of test, that will evaluate if implemented functionality works as expected; 18 | /// - implementation of Element, that we will use as elements of the tree while testing; 19 | /// - implementations of Hasher and Algorithm, that we will use for computing leafs, nodes, root 20 | /// and inclusion proofs while testing; 21 | /// - generator of some arbitrary dataset, that can be used as a source of data for building a tree over it; 22 | /// 23 | /// Implementation of MerkleTree abstraction is rather dense. Trees can be instantiated via 23 different 24 | /// constructors (each constructor is part of public API), while tree can have various type (base, compound, 25 | /// compound-compound), while each type can have arbitrary arity; additionally tree can be backed by 4 different 26 | /// storages, each with own specifics. 27 | /// 28 | /// Having that in mind, and considering that writing tests for all possible combination of parameters can lead to 29 | /// huge amount of code that we need to support, we provide integration tests that cover following cases: 30 | /// 31 | /// - testing instantiation of tree via all constructors that are part of public API; 32 | /// - ensuring that each tree has expected amount of leaves, expected length and expected root; 33 | /// - ensuring that inclusion proof can be successfully created and verified for each tree leaf; 34 | /// 35 | /// What is not covered / evaluated: 36 | /// 37 | /// - checking that arities' tests work for each base / compound / compound-compound constructor; 38 | /// - checking that arities' tests work for Disk / Mmap / LevelCache storages; 39 | /// - instantiation of compound tree using each base constructor; 40 | /// - instantiation of compound-compound tree using each base and each compound constructor; 41 | /// - instantiation of DiskStore and MmapStore compound trees; 42 | /// - instantiation of DiskStore and MmapStore compound-compound trees; 43 | /// - instantiation of compound tree using 'from_store_configs' with custom configurations 44 | /// - instantiation of compound-compound tree using 'from_sub_tree_store_configs' with custom configurations 45 | /// - instantiation of LevelCacheStore base tree using 'from_tree_slice_with_config' constructor 46 | /// - instantiation of LevelCacheStore compound trees using "regular" compound constructors ('from_slices_with_configs', 'from_store_configs'); 47 | 48 | /// Implementation of Element abstraction that we use in our integration tests 49 | #[derive(PartialEq, Eq, PartialOrd, Ord, Copy, Clone, Debug, Default)] 50 | pub struct TestItem([u8; SIZE]); 51 | pub const SIZE: usize = 0x10; 52 | 53 | // We introduce this wrapper-type and implement Element actually for it 54 | // just to avoid writing .clone() all the time in tests 55 | pub type TestItemType = TestItem; 56 | 57 | impl AsRef<[u8]> for TestItem { 58 | fn as_ref(&self) -> &[u8] { 59 | &self.0 60 | } 61 | } 62 | 63 | impl Element for TestItemType { 64 | fn byte_len() -> usize { 65 | SIZE 66 | } 67 | fn from_slice(bytes: &[u8]) -> Self { 68 | assert_eq!(bytes.len(), Self::byte_len()); 69 | let mut el = [0u8; SIZE]; 70 | el[..].copy_from_slice(bytes); 71 | TestItem(el) 72 | } 73 | fn copy_to_slice(&self, bytes: &mut [u8]) { 74 | bytes.copy_from_slice(&self.0); 75 | } 76 | } 77 | 78 | /// XOR128 implementation of Algorithm abstraction that we use in our integration tests 79 | pub struct TestXOR128 { 80 | data: TestItem, 81 | i: usize, 82 | } 83 | 84 | impl TestXOR128 { 85 | pub fn new() -> TestXOR128 { 86 | TestXOR128 { 87 | data: TestItem([0u8; SIZE]), 88 | i: 0, 89 | } 90 | } 91 | } 92 | 93 | impl Hasher for TestXOR128 { 94 | fn finish(&self) -> u64 { 95 | // FIXME: contract is broken by design 96 | unimplemented!( 97 | "Hasher's contract (finish function is not used) is deliberately broken by design" 98 | ) 99 | } 100 | fn write(&mut self, bytes: &[u8]) { 101 | for x in bytes { 102 | self.data.0[self.i & 15] ^= *x; 103 | self.i += 1; 104 | } 105 | } 106 | } 107 | 108 | impl Default for TestXOR128 { 109 | fn default() -> Self { 110 | TestXOR128::new() 111 | } 112 | } 113 | 114 | impl Algorithm for TestXOR128 { 115 | fn hash(&mut self) -> TestItem { 116 | self.data 117 | } 118 | } 119 | 120 | /// SHA256 implementations of Algorithm abstraction that we use in our integration tests 121 | pub struct TestSha256Hasher { 122 | engine: Sha256, 123 | } 124 | 125 | impl TestSha256Hasher { 126 | pub fn new() -> TestSha256Hasher { 127 | TestSha256Hasher { 128 | engine: Sha256::new(), 129 | } 130 | } 131 | } 132 | 133 | impl fmt::Debug for TestSha256Hasher { 134 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 135 | f.write_str("Sha256Hasher") 136 | } 137 | } 138 | 139 | impl Default for TestSha256Hasher { 140 | fn default() -> Self { 141 | TestSha256Hasher::new() 142 | } 143 | } 144 | 145 | impl Hasher for TestSha256Hasher { 146 | // FIXME: contract is broken by design 147 | fn finish(&self) -> u64 { 148 | unimplemented!( 149 | "Hasher's contract (finish function is not used) is deliberately broken by design" 150 | ) 151 | } 152 | 153 | fn write(&mut self, bytes: &[u8]) { 154 | self.engine.update(bytes) 155 | } 156 | } 157 | 158 | impl Algorithm for TestSha256Hasher { 159 | fn hash(&mut self) -> TestItem { 160 | let mut result = TestItem::default(); 161 | let item_size = result.0.len(); 162 | let hash_output = self.engine.clone().finalize().to_vec(); 163 | self.engine.reset(); 164 | if item_size < hash_output.len() { 165 | result 166 | .0 167 | .copy_from_slice(&hash_output.as_slice()[0..item_size]); 168 | } else { 169 | result.0.copy_from_slice(hash_output.as_slice()) 170 | } 171 | result 172 | } 173 | } 174 | 175 | /// Dataset generators 176 | /// 177 | /// We need to provide 3 different datasets: 178 | /// - > 179 | /// - , I: IntoIterator> 180 | /// - datasets based on raw serialization to byte-slice; 181 | /// 182 | /// because various MerkleTree constructors have specific requirements 183 | 184 | // generate dataset of iterable elements 185 | pub fn generate_vector_of_elements(leaves: usize) -> Vec { 186 | let result = (0..leaves).map(|index| { 187 | // we are ok with usize -> u8 conversion problems, since we need just predictable dataset 188 | let vector: Vec = (0..E::byte_len()).map(|x| (index + x) as u8).collect(); 189 | E::from_slice(vector.as_slice()) 190 | }); 191 | result.collect() 192 | } 193 | 194 | // generate dataset of iterable and hashable elements 195 | pub fn generate_vector_of_usizes(leaves: usize) -> Vec { 196 | (0..leaves).map(|index| index * 93).collect() 197 | } 198 | 199 | // generate dataset of hashable (usize) elements and serialize it at once 200 | pub fn generate_byte_slice_tree>(leaves: usize) -> Vec { 201 | let mut a = A::default(); 202 | let mut a2 = A::default(); 203 | 204 | let dataset: Vec = generate_vector_of_usizes(leaves) 205 | .iter() 206 | .map(|x| { 207 | a.reset(); 208 | x.hash(&mut a); 209 | a.hash() 210 | }) 211 | .take(leaves) 212 | .flat_map(|item| { 213 | a2.reset(); 214 | a2.leaf(item).as_ref().to_vec() 215 | }) 216 | .collect(); 217 | 218 | dataset 219 | } 220 | 221 | /// Actual tests 222 | pub fn test_disk_mmap_vec_tree_functionality< 223 | E: Element, 224 | A: Algorithm, 225 | S: Store, 226 | BaseTreeArity: Unsigned, 227 | SubTreeArity: Unsigned, 228 | TopTreeArity: Unsigned, 229 | >( 230 | tree: MerkleTree, 231 | expected_leaves: usize, 232 | expected_len: usize, 233 | expected_root: E, 234 | ) { 235 | assert_eq!(tree.leafs(), expected_leaves); 236 | assert_eq!(tree.len(), expected_len); 237 | assert_eq!(tree.root(), expected_root); 238 | 239 | for index in 0..tree.leafs() { 240 | let p = tree 241 | .gen_proof(index) 242 | .expect("[test_disk_mmap_vec_tree_functionality] couldn't generate Merkle Proof"); 243 | assert!(p 244 | .validate::() 245 | .expect("[test_disk_mmap_vec_tree_functionality] failed to validate")); 246 | } 247 | } 248 | 249 | pub fn test_levelcache_tree_functionality< 250 | E: Element, 251 | A: Algorithm, 252 | BaseTreeArity: Unsigned, 253 | SubTreeArity: Unsigned, 254 | TopTreeArity: Unsigned, 255 | >( 256 | tree: MerkleTree< 257 | E, 258 | A, 259 | LevelCacheStore, 260 | BaseTreeArity, 261 | SubTreeArity, 262 | TopTreeArity, 263 | >, 264 | rows_to_discard: Option, 265 | expected_leaves: usize, 266 | expected_len: usize, 267 | expected_root: E, 268 | ) { 269 | assert_eq!(tree.leafs(), expected_leaves); 270 | assert_eq!(tree.len(), expected_len); 271 | assert_eq!(tree.root(), expected_root); 272 | 273 | for index in 0..tree.leafs() { 274 | let p = tree 275 | .gen_cached_proof(index, rows_to_discard) 276 | .expect("[test_levelcache_tree_functionality] couldn't generate cached Merkle Proof"); 277 | assert!(p 278 | .validate::() 279 | .expect("[test_levelcache_tree_functionality] failed to validate")); 280 | } 281 | } 282 | 283 | /// Utilities 284 | pub fn serialize_tree, S: Store, U: Unsigned>( 285 | tree: MerkleTree, 286 | ) -> Vec { 287 | let data = tree.data().expect("can't get tree's data [serialize_tree]"); 288 | let data: Vec = data 289 | .read_range(0..data.len()) 290 | .expect("can't read actual data [serialize_tree]"); 291 | let mut serialized_tree = vec![0u8; E::byte_len() * data.len()]; 292 | let mut start = 0; 293 | let mut end = E::byte_len(); 294 | for element in data { 295 | element.copy_to_slice(&mut serialized_tree[start..end]); 296 | start += E::byte_len(); 297 | end += E::byte_len(); 298 | } 299 | serialized_tree 300 | } 301 | 302 | pub fn instantiate_new, S: Store, U: Unsigned>( 303 | leaves: usize, 304 | _config: Option, 305 | ) -> MerkleTree { 306 | let dataset = generate_vector_of_elements::(leaves); 307 | MerkleTree::new(dataset).expect("failed to instantiate tree [new]") 308 | } 309 | 310 | pub fn instantiate_new_with_config, S: Store, U: Unsigned>( 311 | leaves: usize, 312 | config: Option, 313 | ) -> MerkleTree { 314 | let dataset = generate_vector_of_elements::(leaves); 315 | MerkleTree::new_with_config( 316 | dataset, 317 | config.expect("can't get tree's config [new_with_config]"), 318 | ) 319 | .expect("failed to instantiate tree [new_with_config]") 320 | } 321 | 322 | pub fn dump_tree_data_to_replica( 323 | leaves: usize, 324 | len: usize, 325 | config: &StoreConfig, 326 | replica_file: &mut std::fs::File, 327 | ) { 328 | // Dump tree data to disk 329 | let store = DiskStore::new_with_config(len, BaseTreeArity::to_usize(), config.clone()) 330 | .expect("failed to open store [dump_tree_data_to_replica]"); 331 | 332 | // Use that data store as the replica (concat the data to the replica_path) 333 | let data: Vec = store 334 | .read_range(std::ops::Range { 335 | start: 0, 336 | end: leaves, 337 | }) 338 | .expect("failed to read store [dump_tree_data_to_replica]"); 339 | for element in data { 340 | let mut vector = vec![0u8; E::byte_len()]; 341 | element.copy_to_slice(vector.as_mut_slice()); 342 | replica_file 343 | .write_all(vector.as_slice()) 344 | .expect("failed to write replica data [dump_tree_data_to_replica]"); 345 | } 346 | } 347 | 348 | pub fn get_vector_of_base_trees_as_slices< 349 | E: Element, 350 | A: Algorithm, 351 | S: Store, 352 | BaseTreeArity: Unsigned, 353 | SubTreeArity: Unsigned, 354 | >( 355 | base_tree_leaves: usize, 356 | ) -> Vec> { 357 | (0..SubTreeArity::to_usize()) 358 | .map(|_| { 359 | let base_tree = instantiate_new::(base_tree_leaves, None); 360 | serialize_tree(base_tree) 361 | }) 362 | .collect() 363 | } 364 | 365 | pub fn get_vector_of_base_trees< 366 | E: Element, 367 | A: Algorithm, 368 | S: Store, 369 | BaseTreeArity: Unsigned, 370 | SubTreeArity: Unsigned, 371 | >( 372 | base_tree_leaves: usize, 373 | ) -> Vec> { 374 | (0..SubTreeArity::to_usize()) 375 | .map(|_| instantiate_new(base_tree_leaves, None)) 376 | .collect() 377 | } 378 | -------------------------------------------------------------------------------- /src/store/disk.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{remove_file, File, OpenOptions}; 2 | use std::io::{copy, Seek, SeekFrom}; 3 | use std::iter::FromIterator; 4 | use std::marker::PhantomData; 5 | use std::ops; 6 | use std::path::Path; 7 | use std::sync::{Arc, RwLock}; 8 | 9 | use anyhow::{Context, Result}; 10 | use log::warn; 11 | use memmap2::MmapOptions; 12 | use positioned_io::{ReadAt, WriteAt}; 13 | use rayon::iter::*; 14 | use rayon::prelude::*; 15 | use tempfile::tempfile; 16 | use typenum::marker_traits::Unsigned; 17 | 18 | use crate::hash::Algorithm; 19 | use crate::merkle::{ 20 | get_merkle_tree_cache_size, get_merkle_tree_leafs, get_merkle_tree_len, log2_pow2, next_pow2, 21 | Element, 22 | }; 23 | use crate::store::{Store, StoreConfig, StoreConfigDataVersion, BUILD_CHUNK_NODES}; 24 | 25 | /// The Disk-only store is used to reduce memory to the minimum at the 26 | /// cost of build time performance. Most of its I/O logic is in the 27 | /// `store_copy_from_slice` and `store_read_range` functions. 28 | #[derive(Debug)] 29 | pub struct DiskStore { 30 | len: usize, 31 | elem_len: usize, 32 | _e: PhantomData, 33 | file: File, 34 | 35 | // This flag is useful only immediate after instantiation, which 36 | // is false if the store was newly initialized and true if the 37 | // store was loaded from already existing on-disk data. 38 | loaded_from_disk: bool, 39 | 40 | // We cache the `store.len()` call to avoid accessing disk unnecessarily. 41 | // Not to be confused with `len`, this saves the total size of the `store` 42 | // in bytes and the other one keeps track of used `E` slots in the `DiskStore`. 43 | store_size: usize, 44 | } 45 | 46 | impl Store for DiskStore { 47 | fn new_with_config(size: usize, branches: usize, config: StoreConfig) -> Result { 48 | let data_path = StoreConfig::data_path(&config.path, &config.id); 49 | 50 | // If the specified file exists, load it from disk. 51 | if Path::new(&data_path).exists() { 52 | return Self::new_from_disk(size, branches, &config); 53 | } 54 | 55 | // Otherwise, create the file and allow it to be the on-disk store. 56 | let file = OpenOptions::new() 57 | .write(true) 58 | .read(true) 59 | .create_new(true) 60 | .open(data_path)?; 61 | 62 | let store_size = E::byte_len() * size; 63 | file.set_len(store_size as u64)?; 64 | 65 | Ok(DiskStore { 66 | len: 0, 67 | elem_len: E::byte_len(), 68 | _e: Default::default(), 69 | file, 70 | loaded_from_disk: false, 71 | store_size, 72 | }) 73 | } 74 | 75 | fn new(size: usize) -> Result { 76 | let store_size = E::byte_len() * size; 77 | let file = tempfile()?; 78 | file.set_len(store_size as u64)?; 79 | 80 | Ok(DiskStore { 81 | len: 0, 82 | elem_len: E::byte_len(), 83 | _e: Default::default(), 84 | file, 85 | loaded_from_disk: false, 86 | store_size, 87 | }) 88 | } 89 | 90 | fn new_from_slice_with_config( 91 | size: usize, 92 | branches: usize, 93 | data: &[u8], 94 | config: StoreConfig, 95 | ) -> Result { 96 | ensure!( 97 | data.len() % E::byte_len() == 0, 98 | "data size must be a multiple of {}", 99 | E::byte_len() 100 | ); 101 | 102 | let mut store = Self::new_with_config(size, branches, config)?; 103 | 104 | // If the store was loaded from disk (based on the config 105 | // information, avoid re-populating the store at this point 106 | // since it can be assumed by the config that the data is 107 | // already correct). 108 | if !store.loaded_from_disk { 109 | store.store_copy_from_slice(0, data)?; 110 | store.len = data.len() / store.elem_len; 111 | } 112 | 113 | Ok(store) 114 | } 115 | 116 | fn new_from_slice(size: usize, data: &[u8]) -> Result { 117 | ensure!( 118 | data.len() % E::byte_len() == 0, 119 | "data size must be a multiple of {}", 120 | E::byte_len() 121 | ); 122 | 123 | let mut store = Self::new(size)?; 124 | store.store_copy_from_slice(0, data)?; 125 | store.len = data.len() / store.elem_len; 126 | 127 | Ok(store) 128 | } 129 | 130 | fn new_from_disk(size: usize, _branches: usize, config: &StoreConfig) -> Result { 131 | let data_path = StoreConfig::data_path(&config.path, &config.id); 132 | Self::new_from_disk_with_path(size, data_path) 133 | } 134 | 135 | fn write_at(&mut self, el: E, index: usize) -> Result<()> { 136 | self.store_copy_from_slice(index * self.elem_len, el.as_ref())?; 137 | self.len = std::cmp::max(self.len, index + 1); 138 | Ok(()) 139 | } 140 | 141 | fn copy_from_slice(&mut self, buf: &[u8], start: usize) -> Result<()> { 142 | ensure!( 143 | buf.len() % self.elem_len == 0, 144 | "buf size must be a multiple of {}", 145 | self.elem_len 146 | ); 147 | self.store_copy_from_slice(start * self.elem_len, buf)?; 148 | self.len = std::cmp::max(self.len, start + buf.len() / self.elem_len); 149 | 150 | Ok(()) 151 | } 152 | 153 | fn read_at(&self, index: usize) -> Result { 154 | let start = index * self.elem_len; 155 | let end = start + self.elem_len; 156 | 157 | let len = self.len * self.elem_len; 158 | ensure!(start < len, "start out of range {} >= {}", start, len); 159 | ensure!(end <= len, "end out of range {} > {}", end, len); 160 | 161 | Ok(E::from_slice(&self.store_read_range(start, end)?)) 162 | } 163 | 164 | fn read_into(&self, index: usize, buf: &mut [u8]) -> Result<()> { 165 | let start = index * self.elem_len; 166 | let end = start + self.elem_len; 167 | 168 | let len = self.len * self.elem_len; 169 | ensure!(start < len, "start out of range {} >= {}", start, len); 170 | ensure!(end <= len, "end out of range {} > {}", end, len); 171 | 172 | self.store_read_into(start, end, buf) 173 | } 174 | 175 | fn read_range_into(&self, start: usize, end: usize, buf: &mut [u8]) -> Result<()> { 176 | let start = start * self.elem_len; 177 | let end = end * self.elem_len; 178 | 179 | let len = self.len * self.elem_len; 180 | ensure!(start < len, "start out of range {} >= {}", start, len); 181 | ensure!(end <= len, "end out of range {} > {}", end, len); 182 | 183 | self.store_read_into(start, end, buf) 184 | } 185 | 186 | fn read_range(&self, r: ops::Range) -> Result> { 187 | let start = r.start * self.elem_len; 188 | let end = r.end * self.elem_len; 189 | 190 | let len = self.len * self.elem_len; 191 | ensure!(start < len, "start out of range {} >= {}", start, len); 192 | ensure!(end <= len, "end out of range {} > {}", end, len); 193 | 194 | Ok(self 195 | .store_read_range(start, end)? 196 | .chunks(self.elem_len) 197 | .map(E::from_slice) 198 | .collect()) 199 | } 200 | 201 | fn len(&self) -> usize { 202 | self.len 203 | } 204 | 205 | fn loaded_from_disk(&self) -> bool { 206 | self.loaded_from_disk 207 | } 208 | 209 | // Specifically, this method truncates an existing DiskStore and 210 | // formats the data in such a way that is compatible with future 211 | // access using LevelCacheStore::new_from_disk. 212 | fn compact( 213 | &mut self, 214 | branches: usize, 215 | config: StoreConfig, 216 | store_version: u32, 217 | ) -> Result { 218 | // Determine how many base layer leafs there are (and in bytes). 219 | let leafs = get_merkle_tree_leafs(self.len, branches)?; 220 | let data_width = leafs * self.elem_len; 221 | 222 | // Calculate how large the cache should be (based on the 223 | // config.rows_to_discard param). 224 | let cache_size = 225 | get_merkle_tree_cache_size(leafs, branches, config.rows_to_discard)? * self.elem_len; 226 | 227 | // The file cannot be compacted if the specified configuration 228 | // requires either 1) nothing to be cached, or 2) everything 229 | // to be cached. For #1, create a data store of leafs and do 230 | // not use that store as backing for the MT. For #2, avoid 231 | // calling this method. To resolve, provide a sane 232 | // configuration. 233 | ensure!( 234 | cache_size < self.len * self.elem_len && cache_size != 0, 235 | "Cannot compact with this configuration" 236 | ); 237 | 238 | let v1 = store_version == StoreConfigDataVersion::One as u32; 239 | let start: u64 = if v1 { data_width as u64 } else { 0 }; 240 | 241 | // Calculate cache start and updated size with repect to the 242 | // data size. 243 | let cache_start = self.store_size - cache_size; 244 | 245 | // Seek the reader to the start of the cached data. 246 | let mut reader = OpenOptions::new() 247 | .read(true) 248 | .open(StoreConfig::data_path(&config.path, &config.id))?; 249 | reader.seek(SeekFrom::Start(cache_start as u64))?; 250 | 251 | // Make sure the store file is opened for read/write. 252 | self.file = OpenOptions::new() 253 | .read(true) 254 | .write(true) 255 | .open(StoreConfig::data_path(&config.path, &config.id))?; 256 | 257 | // Seek the writer. 258 | self.file.seek(SeekFrom::Start(start))?; 259 | 260 | // Copy the data from the cached region to the writer. 261 | let written = copy(&mut reader, &mut self.file)?; 262 | ensure!(written == cache_size as u64, "Failed to copy all data"); 263 | if v1 { 264 | // Truncate the data on-disk to be the base layer data 265 | // followed by the cached data. 266 | self.file.set_len((data_width + cache_size) as u64)?; 267 | // Adjust our length for internal consistency. 268 | self.len = (data_width + cache_size) / self.elem_len; 269 | } else { 270 | // Truncate the data on-disk to be only the cached data. 271 | self.file.set_len(cache_size as u64)?; 272 | 273 | // Adjust our length to be the cached elements only for 274 | // internal consistency. 275 | self.len = cache_size / self.elem_len; 276 | } 277 | 278 | // Sync and sanity check that we match on disk (this can be 279 | // removed if needed). 280 | self.sync()?; 281 | let metadata = self.file.metadata()?; 282 | let store_size = metadata.len() as usize; 283 | ensure!( 284 | self.len * self.elem_len == store_size, 285 | "Inconsistent metadata detected" 286 | ); 287 | 288 | Ok(true) 289 | } 290 | 291 | fn delete(config: StoreConfig) -> Result<()> { 292 | let path = StoreConfig::data_path(&config.path, &config.id); 293 | remove_file(&path).with_context(|| format!("Failed to delete {:?}", &path)) 294 | } 295 | 296 | fn is_empty(&self) -> bool { 297 | self.len == 0 298 | } 299 | 300 | fn push(&mut self, el: E) -> Result<()> { 301 | let len = self.len; 302 | ensure!( 303 | (len + 1) * self.elem_len <= self.store_size(), 304 | "not enough space, len: {}, E size {}, store len {}", 305 | len, 306 | self.elem_len, 307 | self.store_size() 308 | ); 309 | 310 | self.write_at(el, len) 311 | } 312 | 313 | fn sync(&self) -> Result<()> { 314 | self.file.sync_all().context("failed to sync file") 315 | } 316 | 317 | #[allow(unsafe_code)] 318 | fn process_layer, U: Unsigned>( 319 | &mut self, 320 | width: usize, 321 | level: usize, 322 | read_start: usize, 323 | write_start: usize, 324 | ) -> Result<()> { 325 | // Safety: this operation is safe becase it's a limited 326 | // writable region on the backing store managed by this type. 327 | let mut mmap = unsafe { 328 | let mut mmap_options = MmapOptions::new(); 329 | mmap_options 330 | .offset((write_start * E::byte_len()) as u64) 331 | .len(width * E::byte_len()) 332 | .map_mut(&self.file) 333 | }?; 334 | 335 | let data_lock = Arc::new(RwLock::new(self)); 336 | let branches = U::to_usize(); 337 | let shift = log2_pow2(branches); 338 | let write_chunk_width = (BUILD_CHUNK_NODES >> shift) * E::byte_len(); 339 | 340 | ensure!(BUILD_CHUNK_NODES % branches == 0, "Invalid chunk size"); 341 | Vec::from_iter((read_start..read_start + width).step_by(BUILD_CHUNK_NODES)) 342 | .into_par_iter() 343 | .zip(mmap.par_chunks_mut(write_chunk_width)) 344 | .try_for_each(|(chunk_index, write_mmap)| -> Result<()> { 345 | let chunk_size = std::cmp::min(BUILD_CHUNK_NODES, read_start + width - chunk_index); 346 | 347 | let chunk_nodes = { 348 | // Read everything taking the lock once. 349 | data_lock 350 | .read() 351 | .expect("[process_layer] error occurred while thread blocking") 352 | .read_range(chunk_index..chunk_index + chunk_size)? 353 | }; 354 | 355 | let nodes_size = (chunk_nodes.len() / branches) * E::byte_len(); 356 | let hashed_nodes_as_bytes = chunk_nodes.chunks(branches).fold( 357 | Vec::with_capacity(nodes_size), 358 | |mut acc, nodes| { 359 | let h = A::default().multi_node(nodes, level); 360 | acc.extend_from_slice(h.as_ref()); 361 | acc 362 | }, 363 | ); 364 | 365 | // Check that we correctly pre-allocated the space. 366 | let hashed_nodes_as_bytes_len = hashed_nodes_as_bytes.len(); 367 | ensure!( 368 | hashed_nodes_as_bytes.len() == chunk_size / branches * E::byte_len(), 369 | "Invalid hashed node length" 370 | ); 371 | 372 | write_mmap[0..hashed_nodes_as_bytes_len].copy_from_slice(&hashed_nodes_as_bytes); 373 | 374 | Ok(()) 375 | }) 376 | } 377 | 378 | // DiskStore specific merkle-tree build. 379 | fn build, U: Unsigned>( 380 | &mut self, 381 | leafs: usize, 382 | row_count: usize, 383 | _config: Option, 384 | ) -> Result { 385 | let branches = U::to_usize(); 386 | ensure!( 387 | next_pow2(branches) == branches, 388 | "branches MUST be a power of 2" 389 | ); 390 | ensure!(Store::len(self) == leafs, "Inconsistent data"); 391 | ensure!(leafs % 2 == 0, "Leafs must be a power of two"); 392 | 393 | // Process one `level` at a time of `width` nodes. Each level has half the nodes 394 | // as the previous one; the first level, completely stored in `data`, has `leafs` 395 | // nodes. We guarantee an even number of nodes per `level`, duplicating the last 396 | // node if necessary. 397 | let mut level: usize = 0; 398 | let mut width = leafs; 399 | let mut level_node_index = 0; 400 | 401 | let shift = log2_pow2(branches); 402 | 403 | while width > 1 { 404 | // Start reading at the beginning of the current level, and writing the next 405 | // level immediate after. `level_node_index` keeps track of the current read 406 | // starts, and width is updated accordingly at each level so that we know where 407 | // to start writing. 408 | let (read_start, write_start) = if level == 0 { 409 | // Note that we previously asserted that data.len() == leafs. 410 | (0, Store::len(self)) 411 | } else { 412 | (level_node_index, level_node_index + width) 413 | }; 414 | 415 | self.process_layer::(width, level, read_start, write_start)?; 416 | 417 | level_node_index += width; 418 | level += 1; 419 | width >>= shift; // width /= branches; 420 | 421 | // When the layer is complete, update the store length 422 | // since we know the backing file was updated outside of 423 | // the store interface. 424 | self.set_len(Store::len(self) + width); 425 | } 426 | 427 | // Ensure every element is accounted for. 428 | ensure!( 429 | Store::len(self) == get_merkle_tree_len(leafs, branches)?, 430 | "Invalid merkle tree length" 431 | ); 432 | 433 | ensure!(row_count == level + 1, "Invalid tree row_count"); 434 | // The root isn't part of the previous loop so `row_count` is 435 | // missing one level. 436 | 437 | // Return the root 438 | self.last() 439 | } 440 | } 441 | 442 | impl DiskStore { 443 | pub fn new_from_disk_with_path>(size: usize, data_path: P) -> Result { 444 | ensure!(data_path.as_ref().exists(), "[DiskStore] new_from_disk constructor can be used only for instantiating already existing storages"); 445 | 446 | let file = match OpenOptions::new().write(true).read(true).open(&data_path) { 447 | Ok(file) => file, 448 | Err(e) => { 449 | if e.kind() == std::io::ErrorKind::PermissionDenied { 450 | warn!( 451 | "[DiskStore] Permission denied occurred. Try to open storage as read-only" 452 | ); 453 | } 454 | OpenOptions::new() 455 | .write(false) 456 | .read(true) 457 | .open(&data_path)? 458 | } 459 | }; 460 | 461 | let metadata = file.metadata()?; 462 | let store_size = metadata.len() as usize; 463 | 464 | // Sanity check. 465 | ensure!( 466 | store_size == size * E::byte_len(), 467 | "Invalid formatted file provided. Expected {} bytes, found {} bytes", 468 | size * E::byte_len(), 469 | store_size 470 | ); 471 | 472 | Ok(DiskStore { 473 | len: size, 474 | elem_len: E::byte_len(), 475 | _e: Default::default(), 476 | file, 477 | loaded_from_disk: true, 478 | store_size, 479 | }) 480 | } 481 | 482 | fn set_len(&mut self, len: usize) { 483 | self.len = len; 484 | } 485 | 486 | // 'store_range' must be the total number of elements in the store 487 | // (e.g. tree.len()). Arity/branches is ignored since a 488 | // DiskStore's size is related only to the number of elements in 489 | // the tree. 490 | pub fn is_consistent( 491 | store_range: usize, 492 | _branches: usize, 493 | config: &StoreConfig, 494 | ) -> Result { 495 | let data_path = StoreConfig::data_path(&config.path, &config.id); 496 | 497 | let file = File::open(data_path)?; 498 | let metadata = file.metadata()?; 499 | let store_size = metadata.len() as usize; 500 | 501 | Ok(store_size == store_range * E::byte_len()) 502 | } 503 | 504 | pub fn store_size(&self) -> usize { 505 | self.store_size 506 | } 507 | 508 | pub fn store_read_range(&self, start: usize, end: usize) -> Result> { 509 | let read_len = end - start; 510 | let mut read_data = vec![0; read_len]; 511 | 512 | self.file 513 | .read_exact_at(start as u64, &mut read_data) 514 | .with_context(|| { 515 | format!( 516 | "failed to read {} bytes from file at offset {}", 517 | read_len, start 518 | ) 519 | })?; 520 | 521 | ensure!(read_data.len() == read_len, "Failed to read the full range"); 522 | 523 | Ok(read_data) 524 | } 525 | 526 | pub fn store_read_into(&self, start: usize, end: usize, buf: &mut [u8]) -> Result<()> { 527 | self.file 528 | .read_exact_at(start as u64, buf) 529 | .with_context(|| { 530 | format!( 531 | "failed to read {} bytes from file at offset {}", 532 | end - start, 533 | start 534 | ) 535 | })?; 536 | 537 | Ok(()) 538 | } 539 | 540 | pub fn store_copy_from_slice(&mut self, start: usize, slice: &[u8]) -> Result<()> { 541 | ensure!( 542 | start + slice.len() <= self.store_size, 543 | "Requested slice too large (max: {})", 544 | self.store_size 545 | ); 546 | self.file.write_all_at(start as u64, slice)?; 547 | 548 | Ok(()) 549 | } 550 | } 551 | -------------------------------------------------------------------------------- /tests/test_base_constructors.rs: -------------------------------------------------------------------------------- 1 | #![cfg(not(tarpaulin_include))] 2 | pub mod common; 3 | 4 | use rayon::iter::IntoParallelIterator; 5 | use typenum::{Unsigned, U0, U2, U8}; 6 | 7 | use merkletree::hash::Algorithm; 8 | use merkletree::merkle::{ 9 | get_merkle_tree_len_generic, get_merkle_tree_row_count, Element, FromIndexedParallelIterator, 10 | MerkleTree, 11 | }; 12 | use merkletree::store::{ 13 | DiskStore, LevelCacheStore, MmapStore, Store, StoreConfig, VecStore, SMALL_TREE_BUILD, 14 | }; 15 | 16 | use crate::common::{ 17 | generate_vector_of_usizes, instantiate_new, instantiate_new_with_config, 18 | test_disk_mmap_vec_tree_functionality, TestItem, TestItemType, TestSha256Hasher, TestXOR128, 19 | }; 20 | 21 | /// Base tree constructors 22 | fn instantiate_try_from_iter, S: Store, U: Unsigned>( 23 | leaves: usize, 24 | _config: Option, 25 | ) -> MerkleTree { 26 | let dataset = common::generate_vector_of_elements::(leaves); 27 | MerkleTree::try_from_iter(dataset.into_iter().map(Ok)) 28 | .expect("failed to instantiate tree [try_from_iter]") 29 | } 30 | 31 | fn instantiate_from_par_iter, S: Store, U: Unsigned>( 32 | leaves: usize, 33 | _config: Option, 34 | ) -> MerkleTree { 35 | let dataset = common::generate_vector_of_elements::(leaves); 36 | MerkleTree::from_par_iter(dataset.into_par_iter()) 37 | .expect("failed to instantiate tree [try_from_par_iter]") 38 | } 39 | 40 | fn instantiate_try_from_iter_with_config, S: Store, U: Unsigned>( 41 | leaves: usize, 42 | config: Option, 43 | ) -> MerkleTree { 44 | let dataset = common::generate_vector_of_elements::(leaves); 45 | MerkleTree::try_from_iter_with_config( 46 | dataset.into_iter().map(Ok), 47 | config.expect("can't get tree's config [try_from_iter_with_config]"), 48 | ) 49 | .expect("failed to instantiate tree [try_from_iter_with_config]") 50 | } 51 | 52 | fn instantiate_from_par_iter_with_config, S: Store, U: Unsigned>( 53 | leaves: usize, 54 | config: Option, 55 | ) -> MerkleTree { 56 | let dataset = common::generate_vector_of_elements::(leaves); 57 | MerkleTree::from_par_iter_with_config( 58 | dataset, 59 | config.expect("can't get tree's config [from_par_iter_with_config]"), 60 | ) 61 | .expect("failed to instantiate tree [from_par_iter_with_config]") 62 | } 63 | 64 | fn instantiate_from_data, S: Store, U: Unsigned>( 65 | leaves: usize, 66 | _config: Option, 67 | ) -> MerkleTree { 68 | let dataset = generate_vector_of_usizes(leaves); 69 | MerkleTree::from_data(dataset.as_slice()).expect("failed to instantiate tree [from_data]") 70 | } 71 | 72 | fn instantiate_from_data_with_config, S: Store, U: Unsigned>( 73 | leaves: usize, 74 | config: Option, 75 | ) -> MerkleTree { 76 | let dataset = generate_vector_of_usizes(leaves); 77 | MerkleTree::from_data_with_config( 78 | dataset.as_slice(), 79 | config.expect("can't get tree's config [from_data_with_config]"), 80 | ) 81 | .expect("failed to instantiate tree [from_data_with_config]") 82 | } 83 | 84 | fn instantiate_from_data_store, S: Store, U: Unsigned>( 85 | leaves: usize, 86 | _config: Option, 87 | ) -> MerkleTree { 88 | let tree = instantiate_from_data::(leaves, None); 89 | let serialized_tree = common::serialize_tree(tree); 90 | let store = Store::new_from_slice(serialized_tree.len(), &serialized_tree) 91 | .expect("can't create new store over existing one [from_data_store]"); 92 | MerkleTree::from_data_store(store, leaves) 93 | .expect("failed to instantiate tree [from_data_store]") 94 | } 95 | 96 | fn instantiate_from_tree_slice, S: Store, U: Unsigned>( 97 | leaves: usize, 98 | _config: Option, 99 | ) -> MerkleTree { 100 | let tree = instantiate_from_data::(leaves, None); 101 | let serialized_tree = common::serialize_tree(tree); 102 | MerkleTree::from_tree_slice(serialized_tree.as_slice(), leaves) 103 | .expect("failed to instantiate tree [from_tree_slice]") 104 | } 105 | 106 | fn instantiate_from_byte_slice, S: Store, U: Unsigned>( 107 | leaves: usize, 108 | _config: Option, 109 | ) -> MerkleTree { 110 | let dataset = common::generate_byte_slice_tree::(leaves); 111 | MerkleTree::from_byte_slice(dataset.as_slice()) 112 | .expect("failed to instantiate tree [from_byte_slice]") 113 | } 114 | 115 | fn instantiate_from_byte_slice_with_config< 116 | E: Element, 117 | A: Algorithm, 118 | S: Store, 119 | U: Unsigned, 120 | >( 121 | leaves: usize, 122 | config: Option, 123 | ) -> MerkleTree { 124 | let dataset = common::generate_byte_slice_tree::(leaves); 125 | MerkleTree::from_byte_slice_with_config( 126 | dataset.as_slice(), 127 | config.expect("from_byte_slice_with_config"), 128 | ) 129 | .expect("failed to instantiate tree [from_byte_slice_with_config]") 130 | } 131 | 132 | fn instantiate_from_tree_slice_with_config< 133 | E: Element, 134 | A: Algorithm, 135 | S: Store, 136 | U: Unsigned, 137 | >( 138 | leaves: usize, 139 | config: Option, 140 | ) -> MerkleTree { 141 | let tmp_tree = instantiate_from_data::(leaves, None); 142 | let serialized_tree = common::serialize_tree(tmp_tree); 143 | MerkleTree::from_tree_slice_with_config( 144 | serialized_tree.as_slice(), 145 | leaves, 146 | config.expect("can't get tree's config [from_tree_slice_with_config]"), 147 | ) 148 | .expect("failed to instantiate tree [from_tree_slice_with_config]") 149 | } 150 | 151 | /// Test executor 152 | fn run_test_base_tree, S: Store, BaseTreeArity: Unsigned>( 153 | constructor: fn(usize, Option) -> MerkleTree, 154 | leaves_in_tree: usize, 155 | config: Option, 156 | expected_leaves: usize, 157 | expected_len: usize, 158 | expected_root: E, 159 | ) { 160 | // base tree has SubTreeArity and TopTreeArity parameters equal to zero 161 | let tree: MerkleTree = constructor(leaves_in_tree, config); 162 | test_disk_mmap_vec_tree_functionality(tree, expected_leaves, expected_len, expected_root); 163 | } 164 | 165 | /// Ultimately we have a list of constructors for base trees 166 | /// that we can divide by actual dataset generator and organize 167 | /// complex integration tests that evaluate correct instantiation 168 | /// of base tree (with fixing base tree arity parameter to U8 - oct tree) 169 | /// using distinct hashers 170 | /// 171 | /// [Iterable] 172 | /// - new 173 | /// - try_from_iter 174 | /// - from_par_iter 175 | /// - new_with_config 176 | /// - try_from_iter_with_config 177 | /// - from_par_iter_with_config 178 | /// 179 | /// [Iterable+Hashable, Serialization] 180 | /// - from_data 181 | /// - from_data_with_config 182 | /// - from_data_store 183 | /// - from_tree_slice 184 | /// - from_byte_slice 185 | /// - from_tree_slice_with_config 186 | /// - from_byte_slice_with_config 187 | 188 | #[test] 189 | fn test_iterable() { 190 | fn run_tests, S: Store>(root: E) { 191 | let base_tree_leaves = 64; 192 | let expected_total_leaves = base_tree_leaves; 193 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 194 | .expect("[test_iterable] couldn't compute Merkle Tree len"); 195 | 196 | run_test_base_tree::( 197 | instantiate_new, 198 | base_tree_leaves, 199 | None, 200 | expected_total_leaves, 201 | len, 202 | root, 203 | ); 204 | 205 | run_test_base_tree::( 206 | instantiate_try_from_iter, 207 | base_tree_leaves, 208 | None, 209 | expected_total_leaves, 210 | len, 211 | root, 212 | ); 213 | 214 | run_test_base_tree::( 215 | instantiate_from_par_iter, 216 | base_tree_leaves, 217 | None, 218 | expected_total_leaves, 219 | len, 220 | root, 221 | ); 222 | 223 | let distinguisher = "instantiate_new_with_config"; 224 | let temp_dir = tempfile::Builder::new() 225 | .prefix(distinguisher) 226 | .tempdir() 227 | .expect("[test_iterable] couldn't create temp_dir"); 228 | run_test_base_tree::( 229 | instantiate_new_with_config, 230 | base_tree_leaves, 231 | Some(StoreConfig::new( 232 | temp_dir.into_path(), 233 | String::from(distinguisher), 234 | 0, 235 | )), 236 | expected_total_leaves, 237 | len, 238 | root, 239 | ); 240 | 241 | let distinguisher = "instantiate_try_from_iter_with_config"; 242 | let temp_dir = tempfile::Builder::new() 243 | .prefix(distinguisher) 244 | .tempdir() 245 | .expect("[test_iterable] couldn't create temp_dir"); 246 | run_test_base_tree::( 247 | instantiate_try_from_iter_with_config, 248 | base_tree_leaves, 249 | Some(StoreConfig::new( 250 | temp_dir.into_path(), 251 | String::from(distinguisher), 252 | 0, 253 | )), 254 | expected_total_leaves, 255 | len, 256 | root, 257 | ); 258 | 259 | let distinguisher = "instantiate_from_par_iter_with_config"; 260 | let temp_dir = tempfile::Builder::new() 261 | .prefix(distinguisher) 262 | .tempdir() 263 | .expect("[test_iterable] couldn't create temp_dir"); 264 | run_test_base_tree::( 265 | instantiate_from_par_iter_with_config, 266 | base_tree_leaves, 267 | Some(StoreConfig::new( 268 | temp_dir.into_path(), 269 | String::from(distinguisher), 270 | 0, 271 | )), 272 | expected_total_leaves, 273 | len, 274 | root, 275 | ); 276 | } 277 | 278 | // Run set of tests over XOR128-based hasher 279 | let root_xor128 = 280 | TestItemType::from_slice(&[65, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0]); 281 | run_tests::>(root_xor128); 282 | run_tests::>(root_xor128); 283 | run_tests::>(root_xor128); 284 | 285 | // Run set of tests over SHA256-based hasher 286 | let root_sha256 = TestItem::from_slice(&[ 287 | 252, 61, 163, 229, 140, 223, 198, 165, 200, 137, 59, 43, 83, 136, 197, 63, 288 | ]); 289 | run_tests::>(root_sha256); 290 | run_tests::>(root_sha256); 291 | run_tests::>(root_sha256); 292 | } 293 | 294 | #[test] 295 | fn test_iterable_hashable_and_serialization() { 296 | fn run_tests, S: Store>(root: E) { 297 | let base_tree_leaves = 64; 298 | let expected_total_leaves = base_tree_leaves; 299 | let len = get_merkle_tree_len_generic::(base_tree_leaves) 300 | .expect("[test_iterable_hashable_and_serialization] couldn't compute Merkle Tree len"); 301 | 302 | run_test_base_tree::( 303 | instantiate_from_data, 304 | base_tree_leaves, 305 | None, 306 | expected_total_leaves, 307 | len, 308 | root, 309 | ); 310 | 311 | let distinguisher = "instantiate_from_data_with_config"; 312 | let temp_dir = tempfile::Builder::new() 313 | .prefix(distinguisher) 314 | .tempdir() 315 | .expect("[test_iterable_hashable_and_serialization] couldn't create temp_dir"); 316 | run_test_base_tree::( 317 | instantiate_from_data_with_config, 318 | base_tree_leaves, 319 | Some(StoreConfig::new( 320 | temp_dir.into_path(), 321 | String::from(distinguisher), 322 | 0, 323 | )), 324 | expected_total_leaves, 325 | len, 326 | root, 327 | ); 328 | 329 | run_test_base_tree::( 330 | instantiate_from_data_store, 331 | base_tree_leaves, 332 | None, 333 | expected_total_leaves, 334 | len, 335 | root, 336 | ); 337 | 338 | run_test_base_tree::( 339 | instantiate_from_tree_slice, 340 | base_tree_leaves, 341 | None, 342 | expected_total_leaves, 343 | len, 344 | root, 345 | ); 346 | 347 | run_test_base_tree::( 348 | instantiate_from_byte_slice, 349 | base_tree_leaves, 350 | None, 351 | expected_total_leaves, 352 | len, 353 | root, 354 | ); 355 | 356 | let distinguisher = "instantiate_from_byte_slice_with_config"; 357 | let temp_dir = tempfile::Builder::new() 358 | .prefix(distinguisher) 359 | .tempdir() 360 | .expect("[test_iterable_hashable_and_serialization] couldn't create temp_dir"); 361 | run_test_base_tree::( 362 | instantiate_from_byte_slice_with_config, 363 | base_tree_leaves, 364 | Some(StoreConfig::new( 365 | temp_dir.into_path(), 366 | String::from(distinguisher), 367 | 0, 368 | )), 369 | expected_total_leaves, 370 | len, 371 | root, 372 | ); 373 | 374 | let distinguisher = "instantiate_from_tree_slice_with_config"; 375 | let temp_dir = tempfile::Builder::new() 376 | .prefix(distinguisher) 377 | .tempdir() 378 | .expect("[test_iterable_hashable_and_serialization] couldn't create temp_dir"); 379 | run_test_base_tree::( 380 | instantiate_from_tree_slice_with_config, 381 | base_tree_leaves, 382 | Some(StoreConfig::new( 383 | temp_dir.into_path(), 384 | String::from(distinguisher), 385 | 0, 386 | )), 387 | expected_total_leaves, 388 | len, 389 | root, 390 | ); 391 | } 392 | 393 | // Run set of tests over XOR128-based hasher 394 | let root_xor128 = TestItemType::from_slice(&[1, 0, 0, 0, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]); 395 | run_tests::>(root_xor128); 396 | run_tests::>(root_xor128); 397 | run_tests::>(root_xor128); 398 | 399 | // Run set of tests over SHA256-based hasher 400 | let root_sha256 = TestItem::from_slice(&[ 401 | 98, 103, 202, 101, 121, 179, 6, 237, 133, 39, 253, 169, 173, 63, 89, 188, 402 | ]); 403 | run_tests::>(root_sha256); 404 | run_tests::>(root_sha256); 405 | run_tests::>(root_sha256); 406 | } 407 | 408 | /// Test executor 409 | /// 410 | /// Logically this test only checks that created tree has expected storage. 411 | /// Since Rust doesn't provide special tools for comparing types (only some unstable tools 412 | /// exist - https://stackoverflow.com/questions/60138397/how-to-test-for-type-equality-in-rust) 413 | /// we just compare partial strings from formatted storages 414 | fn run_base_tree_storage_test, S: Store, BaseTreeArity: Unsigned>( 415 | constructor: fn(usize, Option) -> MerkleTree, 416 | leaves_in_tree: usize, 417 | config: Option, 418 | expected_storage: S, 419 | ) { 420 | // it should be enough for our current storages 421 | const SYMBOLS_TO_TRUNCATE: usize = 5; 422 | 423 | let tree = constructor(leaves_in_tree, config); 424 | let actual_storage = tree.data().expect("can't get type of tree's storage"); 425 | 426 | let mut expected = format!("{:?}", expected_storage); 427 | let mut actual = format!("{:?}", actual_storage); 428 | 429 | expected.truncate(SYMBOLS_TO_TRUNCATE); 430 | actual.truncate(SYMBOLS_TO_TRUNCATE); 431 | 432 | assert_eq!(expected, actual); 433 | } 434 | 435 | /// This integration test evaluates that base tree of any storage (Disk, LevelCache and Mmap; 436 | /// we don't check here VecStore as it is already evaluated in previous test) can be correctly 437 | /// instantiated with expected data storage type 438 | /// 439 | #[test] 440 | fn test_storage_types() { 441 | let base_tree_leaves = 64; 442 | let expected_total_leaves = base_tree_leaves; 443 | let branches = 8; 444 | 445 | // Disk 446 | type DiskStorage = DiskStore; 447 | let distinguisher = "instantiate_new_with_config-disk"; 448 | let temp_dir = tempfile::Builder::new() 449 | .prefix(distinguisher) 450 | .tempdir() 451 | .expect("[test_storage_types] couldn't create temp_dir"); 452 | run_base_tree_storage_test::( 453 | instantiate_new_with_config, 454 | base_tree_leaves, 455 | Some(StoreConfig::new( 456 | temp_dir.into_path(), 457 | String::from(distinguisher), 458 | StoreConfig::default_rows_to_discard(expected_total_leaves, branches), 459 | )), 460 | DiskStorage::new(1).expect("[test_storage_types] couldn't create DiskStorage"), 461 | ); 462 | 463 | // Mmap 464 | type MmapStorage = MmapStore; 465 | let distinguisher = "instantiate_new_with_config-mmap"; 466 | let temp_dir = tempfile::Builder::new() 467 | .prefix(distinguisher) 468 | .tempdir() 469 | .expect("[test_storage_types] couldn't create temp_dir"); 470 | run_base_tree_storage_test::( 471 | instantiate_new_with_config, 472 | base_tree_leaves, 473 | Some(StoreConfig::new( 474 | temp_dir.into_path(), 475 | String::from(distinguisher), 476 | StoreConfig::default_rows_to_discard(expected_total_leaves, branches), 477 | )), 478 | MmapStorage::new(1).expect("[test_storage_types] couldn't instantiate MmapStorage"), 479 | ); 480 | 481 | // Level-cache 482 | type LevelCacheStorage = LevelCacheStore; 483 | let distinguisher = "instantiate_new_with_config-level-cache"; 484 | let temp_dir = tempfile::Builder::new() 485 | .prefix(distinguisher) 486 | .tempdir() 487 | .expect("[test_storage_types] couldn't create temp_dir"); 488 | run_base_tree_storage_test::( 489 | instantiate_new_with_config, 490 | base_tree_leaves, 491 | Some(StoreConfig::new( 492 | temp_dir.into_path(), 493 | String::from(distinguisher), 494 | StoreConfig::default_rows_to_discard(expected_total_leaves, branches), 495 | )), 496 | LevelCacheStorage::new(1) 497 | .expect("[test_storage_types] couldn't instantiate LevelCacheStorage"), 498 | ); 499 | } 500 | 501 | // big test moved from test_xor128.rs 502 | #[test] 503 | #[ignore] 504 | fn test_large_base_trees() { 505 | fn run_test( 506 | leaves: usize, 507 | len: usize, 508 | row_count: usize, 509 | num_challenges: usize, 510 | ) { 511 | let big_tree = 512 | instantiate_new::, BaseTreeArity>( 513 | leaves, None, 514 | ); 515 | 516 | assert_eq!(big_tree.row_count(), row_count); 517 | assert_eq!(big_tree.len(), len); 518 | 519 | // Selectively verify that proving works. 520 | for i in 0..num_challenges { 521 | let index = i * (leaves / num_challenges); 522 | let proof = big_tree.gen_proof(index).expect("Failed to generate proof"); 523 | assert!(proof 524 | .validate::() 525 | .expect("failed to validate proof")); 526 | } 527 | } 528 | 529 | let (leaves, len, row_count, num_challenges) = { (16777216, 19173961, 9, 1024) }; 530 | run_test::(leaves, len, row_count, num_challenges); 531 | 532 | let leaves = SMALL_TREE_BUILD * 2; 533 | let num_challenges = SMALL_TREE_BUILD * 2; 534 | let branches = 2; 535 | run_test::( 536 | leaves, 537 | get_merkle_tree_len_generic::(leaves).expect("can't get tree len"), 538 | get_merkle_tree_row_count(leaves, branches), 539 | num_challenges, 540 | ); 541 | } 542 | -------------------------------------------------------------------------------- /src/store/mod.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::fs::OpenOptions; 3 | use std::io::Read; 4 | use std::iter::FromIterator; 5 | use std::ops; 6 | use std::path::{Path, PathBuf}; 7 | use std::sync::{Arc, RwLock}; 8 | 9 | use anyhow::Result; 10 | use positioned_io::ReadAt; 11 | use rayon::iter::plumbing::*; 12 | use rayon::iter::*; 13 | use rayon::prelude::*; 14 | use serde::{Deserialize, Serialize}; 15 | use typenum::marker_traits::Unsigned; 16 | 17 | use crate::hash::Algorithm; 18 | use crate::merkle::{get_merkle_tree_row_count, log2_pow2, next_pow2, Element}; 19 | 20 | /// Tree size (number of nodes) used as threshold to decide which build algorithm 21 | /// to use. Small trees (below this value) use the old build algorithm, optimized 22 | /// for speed rather than memory, allocating as much as needed to allow multiple 23 | /// threads to work concurrently without interrupting each other. Large trees (above) 24 | /// use the new build algorithm, optimized for memory rather than speed, allocating 25 | /// as less as possible with multiple threads competing to get the write lock. 26 | pub const SMALL_TREE_BUILD: usize = 1024; 27 | 28 | // Number of nodes to process in parallel during the `build` stage. 29 | pub const BUILD_CHUNK_NODES: usize = 1024 * 4; 30 | 31 | mod disk; 32 | mod level_cache; 33 | mod mmap; 34 | mod vec; 35 | 36 | pub use disk::DiskStore; 37 | pub use level_cache::LevelCacheStore; 38 | pub use mmap::MmapStore; 39 | pub use vec::VecStore; 40 | 41 | #[derive(Clone)] 42 | pub struct ExternalReader { 43 | pub offset: usize, 44 | pub source: R, 45 | pub read_fn: fn(start: usize, end: usize, buf: &mut [u8], source: &R) -> Result, 46 | } 47 | 48 | impl ExternalReader { 49 | pub fn read(&self, start: usize, end: usize, buf: &mut [u8]) -> Result { 50 | (self.read_fn)(start + self.offset, end + self.offset, buf, &self.source) 51 | } 52 | } 53 | 54 | impl ExternalReader { 55 | pub fn new_from_config(replica_config: &ReplicaConfig, index: usize) -> Result { 56 | let reader = OpenOptions::new().read(true).open(&replica_config.path)?; 57 | 58 | Ok(ExternalReader { 59 | offset: replica_config.offsets[index], 60 | source: reader, 61 | read_fn: |start, end, buf: &mut [u8], reader: &std::fs::File| { 62 | reader.read_exact_at(start as u64, &mut buf[0..end - start])?; 63 | 64 | Ok(end - start) 65 | }, 66 | }) 67 | } 68 | 69 | pub fn new_from_path(path: &Path) -> Result { 70 | Self::new_from_config(&ReplicaConfig::from(path), 0) 71 | } 72 | } 73 | 74 | impl fmt::Debug for ExternalReader { 75 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 76 | f.debug_struct("ExternalReader") 77 | .field("source: Read + Send + Sync", &1i32) 78 | .field( 79 | "read_fn: callback(start: usize, end: usize, buf: &mut [u8])", 80 | &2i32, 81 | ) 82 | .finish() 83 | } 84 | } 85 | 86 | // Version 1 always contained the base layer data (even after 'compact'). 87 | // Version 2 no longer contains the base layer data after compact. 88 | #[derive(Clone, Copy, Debug)] 89 | pub enum StoreConfigDataVersion { 90 | One = 1, 91 | Two = 2, 92 | } 93 | 94 | const DEFAULT_STORE_CONFIG_DATA_VERSION: u32 = StoreConfigDataVersion::Two as u32; 95 | 96 | #[derive(Clone, Debug, Serialize, Deserialize, Default)] 97 | pub struct ReplicaConfig { 98 | pub path: PathBuf, 99 | pub offsets: Vec, 100 | } 101 | 102 | impl ReplicaConfig { 103 | pub fn new>(path: T, offsets: Vec) -> Self { 104 | ReplicaConfig { 105 | path: path.into(), 106 | offsets, 107 | } 108 | } 109 | } 110 | 111 | impl From<&Path> for ReplicaConfig { 112 | fn from(path: &Path) -> Self { 113 | ReplicaConfig { 114 | path: path.to_owned(), 115 | offsets: vec![0], 116 | } 117 | } 118 | } 119 | 120 | #[derive(Clone, Debug, Serialize, Deserialize, Default)] 121 | pub struct StoreConfig { 122 | /// A directory in which data (a merkle tree) can be persisted. 123 | pub path: PathBuf, 124 | 125 | /// A unique identifier used to help specify the on-disk store 126 | /// location for this particular data. 127 | pub id: String, 128 | 129 | /// The number of elements in the DiskStore. This field is 130 | /// optional, and unused internally. 131 | pub size: Option, 132 | 133 | /// The number of merkle tree rows_to_discard then cache on disk. 134 | pub rows_to_discard: usize, 135 | } 136 | 137 | impl StoreConfig { 138 | pub fn new, S: Into>(path: T, id: S, rows_to_discard: usize) -> Self { 139 | StoreConfig { 140 | path: path.into(), 141 | id: id.into(), 142 | size: None, 143 | rows_to_discard, 144 | } 145 | } 146 | 147 | // If the tree is large enough to use the default value 148 | // (per-arity), use it. If it's too small to cache anything 149 | // (i.e. not enough rows), don't discard any. 150 | pub fn default_rows_to_discard(leafs: usize, branches: usize) -> usize { 151 | let row_count = get_merkle_tree_row_count(leafs, branches); 152 | if row_count <= 2 { 153 | // If a tree only has a root row and/or base, there is 154 | // nothing to discard. 155 | return 0; 156 | } else if row_count == 3 { 157 | // If a tree only has 1 row between the base and root, 158 | // it's all that can be discarded. 159 | return 1; 160 | } 161 | 162 | // row_count - 2 discounts the base layer (1) and root (1) 163 | let max_rows_to_discard = row_count - 2; 164 | 165 | // Discard at most 'constant value' rows (coded below, 166 | // differing by arity) while respecting the max number that 167 | // the tree can support discarding. 168 | match branches { 169 | 2 => std::cmp::min(max_rows_to_discard, 7), 170 | 4 => std::cmp::min(max_rows_to_discard, 5), 171 | _ => std::cmp::min(max_rows_to_discard, 2), 172 | } 173 | } 174 | 175 | // Deterministically create the data_path on-disk location from a 176 | // path and specified id. 177 | pub fn data_path(path: &Path, id: &str) -> PathBuf { 178 | Path::new(&path).join(format!( 179 | "sc-{:0>2}-data-{}.dat", 180 | DEFAULT_STORE_CONFIG_DATA_VERSION, id 181 | )) 182 | } 183 | 184 | pub fn from_config>(config: &StoreConfig, id: S, size: Option) -> Self { 185 | let val = if let Some(size) = size { 186 | Some(size) 187 | } else { 188 | config.size 189 | }; 190 | 191 | StoreConfig { 192 | path: config.path.clone(), 193 | id: id.into(), 194 | size: val, 195 | rows_to_discard: config.rows_to_discard, 196 | } 197 | } 198 | } 199 | 200 | /// Backing store of the merkle tree. 201 | pub trait Store: std::fmt::Debug + Send + Sync + Sized { 202 | /// Creates a new store which can store up to `size` elements. 203 | fn new_with_config(size: usize, branches: usize, config: StoreConfig) -> Result; 204 | fn new(size: usize) -> Result; 205 | 206 | fn new_from_slice_with_config( 207 | size: usize, 208 | branches: usize, 209 | data: &[u8], 210 | config: StoreConfig, 211 | ) -> Result; 212 | 213 | fn new_from_slice(size: usize, data: &[u8]) -> Result; 214 | 215 | /// This constructor is used for instantiating stores ONLY from existing (potentially read-only) files 216 | fn new_from_disk(size: usize, branches: usize, config: &StoreConfig) -> Result; 217 | 218 | fn write_at(&mut self, el: E, index: usize) -> Result<()>; 219 | 220 | // Used to reduce lock contention and do the `E` to `u8` 221 | // conversion in `build` *outside* the lock. 222 | // `buf` is a slice of converted `E`s and `start` is its 223 | // position in `E` sizes (*not* in `u8`). 224 | fn copy_from_slice(&mut self, buf: &[u8], start: usize) -> Result<()>; 225 | 226 | // compact/shrink resources used where possible. 227 | fn compact(&mut self, branches: usize, config: StoreConfig, store_version: u32) 228 | -> Result; 229 | 230 | // re-instate resource usage where needed. 231 | fn reinit(&mut self) -> Result<()> { 232 | Ok(()) 233 | } 234 | 235 | // Removes the store backing (does not require a mutable reference 236 | // since the config should provide stateless context to what's 237 | // needed to be removed -- with the exception of in memory stores, 238 | // where this is arguably not important/needed). 239 | fn delete(config: StoreConfig) -> Result<()>; 240 | 241 | fn read_at(&self, index: usize) -> Result; 242 | fn read_range(&self, r: ops::Range) -> Result>; 243 | fn read_into(&self, pos: usize, buf: &mut [u8]) -> Result<()>; 244 | fn read_range_into(&self, start: usize, end: usize, buf: &mut [u8]) -> Result<()>; 245 | 246 | fn len(&self) -> usize; 247 | fn loaded_from_disk(&self) -> bool; 248 | fn is_empty(&self) -> bool; 249 | fn push(&mut self, el: E) -> Result<()>; 250 | fn last(&self) -> Result { 251 | self.read_at(self.len() - 1) 252 | } 253 | 254 | // Sync contents to disk (if it exists). This function is used to avoid 255 | // unnecessary flush calls at the cost of added code complexity. 256 | fn sync(&self) -> Result<()> { 257 | Ok(()) 258 | } 259 | 260 | #[inline] 261 | fn build_small_tree, U: Unsigned>( 262 | &mut self, 263 | leafs: usize, 264 | row_count: usize, 265 | ) -> Result { 266 | ensure!(leafs % 2 == 0, "Leafs must be a power of two"); 267 | 268 | let mut level: usize = 0; 269 | let mut width = leafs; 270 | let mut level_node_index = 0; 271 | let branches = U::to_usize(); 272 | let shift = log2_pow2(branches); 273 | 274 | while width > 1 { 275 | // Same indexing logic as `build`. 276 | let (layer, write_start) = { 277 | let (read_start, write_start) = if level == 0 { 278 | // Note that we previously asserted that data.len() == leafs. 279 | (0, Store::len(self)) 280 | } else { 281 | (level_node_index, level_node_index + width) 282 | }; 283 | 284 | let layer: Vec<_> = self 285 | .read_range(read_start..read_start + width)? 286 | .par_chunks(branches) 287 | .map(|nodes| A::default().multi_node(nodes, level)) 288 | .collect(); 289 | 290 | (layer, write_start) 291 | }; 292 | 293 | for (i, node) in layer.into_iter().enumerate() { 294 | self.write_at(node, write_start + i)?; 295 | } 296 | 297 | level_node_index += width; 298 | level += 1; 299 | width >>= shift; // width /= branches; 300 | } 301 | 302 | ensure!(row_count == level + 1, "Invalid tree row_count"); 303 | // The root isn't part of the previous loop so `row_count` is 304 | // missing one level. 305 | 306 | self.last() 307 | } 308 | 309 | fn process_layer, U: Unsigned>( 310 | &mut self, 311 | width: usize, 312 | level: usize, 313 | read_start: usize, 314 | write_start: usize, 315 | ) -> Result<()> { 316 | let branches = U::to_usize(); 317 | let data_lock = Arc::new(RwLock::new(self)); 318 | 319 | // Allocate `width` indexes during operation (which is a negligible memory bloat 320 | // compared to the 32-bytes size of the nodes stored in the `Store`s) and hash each 321 | // pair of nodes to write them to the next level in concurrent threads. 322 | // Process `BUILD_CHUNK_NODES` nodes in each thread at a time to reduce contention, 323 | // optimized for big sector sizes (small ones will just have one thread doing all 324 | // the work). 325 | ensure!(BUILD_CHUNK_NODES % branches == 0, "Invalid chunk size"); 326 | Vec::from_iter((read_start..read_start + width).step_by(BUILD_CHUNK_NODES)) 327 | .par_iter() 328 | .try_for_each(|&chunk_index| -> Result<()> { 329 | let chunk_size = std::cmp::min(BUILD_CHUNK_NODES, read_start + width - chunk_index); 330 | 331 | let chunk_nodes = { 332 | // Read everything taking the lock once. 333 | data_lock 334 | .read() 335 | .expect("[process_layer] couldn't block current thread") 336 | .read_range(chunk_index..chunk_index + chunk_size)? 337 | }; 338 | 339 | // We write the hashed nodes to the next level in the 340 | // position that would be "in the middle" of the 341 | // previous pair (dividing by branches). 342 | let write_delta = (chunk_index - read_start) / branches; 343 | 344 | let nodes_size = (chunk_nodes.len() / branches) * E::byte_len(); 345 | let hashed_nodes_as_bytes = chunk_nodes.chunks(branches).fold( 346 | Vec::with_capacity(nodes_size), 347 | |mut acc, nodes| { 348 | let h = A::default().multi_node(nodes, level); 349 | acc.extend_from_slice(h.as_ref()); 350 | acc 351 | }, 352 | ); 353 | 354 | // Check that we correctly pre-allocated the space. 355 | ensure!( 356 | hashed_nodes_as_bytes.len() == chunk_size / branches * E::byte_len(), 357 | "Invalid hashed node length" 358 | ); 359 | 360 | // Write the data into the store. 361 | data_lock 362 | .write() 363 | .expect("[process_layer] couldn't block current thread") 364 | .copy_from_slice(&hashed_nodes_as_bytes, write_start + write_delta) 365 | }) 366 | } 367 | 368 | // Default merkle-tree build, based on store type. 369 | fn build, U: Unsigned>( 370 | &mut self, 371 | leafs: usize, 372 | row_count: usize, 373 | _config: Option, 374 | ) -> Result { 375 | let branches = U::to_usize(); 376 | ensure!( 377 | next_pow2(branches) == branches, 378 | "branches MUST be a power of 2" 379 | ); 380 | ensure!(Store::len(self) == leafs, "Inconsistent data"); 381 | ensure!(leafs % 2 == 0, "Leafs must be a power of two"); 382 | 383 | if leafs <= SMALL_TREE_BUILD { 384 | return self.build_small_tree::(leafs, row_count); 385 | } 386 | 387 | let shift = log2_pow2(branches); 388 | 389 | // Process one `level` at a time of `width` nodes. Each level has half the nodes 390 | // as the previous one; the first level, completely stored in `data`, has `leafs` 391 | // nodes. We guarantee an even number of nodes per `level`, duplicating the last 392 | // node if necessary. 393 | let mut level: usize = 0; 394 | let mut width = leafs; 395 | let mut level_node_index = 0; 396 | while width > 1 { 397 | // Start reading at the beginning of the current level, and writing the next 398 | // level immediate after. `level_node_index` keeps track of the current read 399 | // starts, and width is updated accordingly at each level so that we know where 400 | // to start writing. 401 | let (read_start, write_start) = if level == 0 { 402 | // Note that we previously asserted that data.len() == leafs. 403 | //(0, data_lock.read().unwrap().len()) 404 | (0, Store::len(self)) 405 | } else { 406 | (level_node_index, level_node_index + width) 407 | }; 408 | 409 | self.process_layer::(width, level, read_start, write_start)?; 410 | 411 | level_node_index += width; 412 | level += 1; 413 | width >>= shift; // width /= branches; 414 | } 415 | 416 | ensure!(row_count == level + 1, "Invalid tree row_count"); 417 | // The root isn't part of the previous loop so `row_count` is 418 | // missing one level. 419 | 420 | // Return the root 421 | self.last() 422 | } 423 | } 424 | 425 | // Using a macro as it is not possible to do a generic implementation for all stores. 426 | 427 | macro_rules! impl_parallel_iter { 428 | ($name:ident, $producer:ident, $iter:ident) => { 429 | impl ParallelIterator for $name { 430 | type Item = E; 431 | 432 | fn drive_unindexed(self, consumer: C) -> C::Result 433 | where 434 | C: UnindexedConsumer, 435 | { 436 | bridge(self, consumer) 437 | } 438 | 439 | fn opt_len(&self) -> Option { 440 | Some(Store::len(self)) 441 | } 442 | } 443 | impl<'a, E: Element> ParallelIterator for &'a $name { 444 | type Item = E; 445 | 446 | fn drive_unindexed(self, consumer: C) -> C::Result 447 | where 448 | C: UnindexedConsumer, 449 | { 450 | bridge(self, consumer) 451 | } 452 | 453 | fn opt_len(&self) -> Option { 454 | Some(Store::len(*self)) 455 | } 456 | } 457 | 458 | impl IndexedParallelIterator for $name { 459 | fn drive(self, consumer: C) -> C::Result 460 | where 461 | C: Consumer, 462 | { 463 | bridge(self, consumer) 464 | } 465 | 466 | fn len(&self) -> usize { 467 | Store::len(self) 468 | } 469 | 470 | fn with_producer(self, callback: CB) -> CB::Output 471 | where 472 | CB: ProducerCallback, 473 | { 474 | callback.callback(<$producer>::new(0, Store::len(&self), &self)) 475 | } 476 | } 477 | 478 | impl<'a, E: Element> IndexedParallelIterator for &'a $name { 479 | fn drive(self, consumer: C) -> C::Result 480 | where 481 | C: Consumer, 482 | { 483 | bridge(self, consumer) 484 | } 485 | 486 | fn len(&self) -> usize { 487 | Store::len(*self) 488 | } 489 | 490 | fn with_producer(self, callback: CB) -> CB::Output 491 | where 492 | CB: ProducerCallback, 493 | { 494 | callback.callback(<$producer>::new(0, Store::len(self), self)) 495 | } 496 | } 497 | 498 | #[derive(Debug, Clone)] 499 | pub struct $producer<'data, E: Element> { 500 | pub(crate) current: usize, 501 | pub(crate) end: usize, 502 | pub(crate) store: &'data $name, 503 | } 504 | 505 | impl<'data, E: 'data + Element> $producer<'data, E> { 506 | pub fn new(current: usize, end: usize, store: &'data $name) -> Self { 507 | Self { 508 | current, 509 | end, 510 | store, 511 | } 512 | } 513 | 514 | pub fn len(&self) -> usize { 515 | self.end - self.current 516 | } 517 | 518 | pub fn is_empty(&self) -> bool { 519 | self.len() == 0 520 | } 521 | } 522 | 523 | impl<'data, E: 'data + Element> Producer for $producer<'data, E> { 524 | type Item = E; 525 | type IntoIter = $iter<'data, E>; 526 | 527 | fn into_iter(self) -> Self::IntoIter { 528 | let $producer { 529 | current, 530 | end, 531 | store, 532 | } = self; 533 | 534 | $iter { 535 | current, 536 | end, 537 | store, 538 | err: false, 539 | } 540 | } 541 | 542 | fn split_at(self, index: usize) -> (Self, Self) { 543 | let len = self.len(); 544 | 545 | if len == 0 { 546 | return ( 547 | <$producer>::new(0, 0, &self.store), 548 | <$producer>::new(0, 0, &self.store), 549 | ); 550 | } 551 | 552 | let current = self.current; 553 | let first_end = current + std::cmp::min(len, index); 554 | 555 | debug_assert!(first_end >= current); 556 | debug_assert!(current + len >= first_end); 557 | 558 | ( 559 | <$producer>::new(current, first_end, &self.store), 560 | <$producer>::new(first_end, current + len, &self.store), 561 | ) 562 | } 563 | } 564 | #[derive(Debug)] 565 | pub struct $iter<'data, E: Element> { 566 | current: usize, 567 | end: usize, 568 | err: bool, 569 | store: &'data $name, 570 | } 571 | 572 | impl<'data, E: 'data + Element> $iter<'data, E> { 573 | fn is_done(&self) -> bool { 574 | !self.err && self.len() == 0 575 | } 576 | } 577 | 578 | impl<'data, E: 'data + Element> Iterator for $iter<'data, E> { 579 | type Item = E; 580 | 581 | fn next(&mut self) -> Option { 582 | if self.is_done() { 583 | return None; 584 | } 585 | 586 | match self.store.read_at(self.current) { 587 | Ok(el) => { 588 | self.current += 1; 589 | Some(el) 590 | } 591 | _ => { 592 | self.err = true; 593 | None 594 | } 595 | } 596 | } 597 | } 598 | 599 | impl<'data, E: 'data + Element> ExactSizeIterator for $iter<'data, E> { 600 | fn len(&self) -> usize { 601 | debug_assert!(self.current <= self.end); 602 | self.end - self.current 603 | } 604 | } 605 | 606 | impl<'data, E: 'data + Element> DoubleEndedIterator for $iter<'data, E> { 607 | fn next_back(&mut self) -> Option { 608 | if self.is_done() { 609 | return None; 610 | } 611 | 612 | match self.store.read_at(self.end - 1) { 613 | Ok(el) => { 614 | self.end -= 1; 615 | Some(el) 616 | } 617 | _ => { 618 | self.err = true; 619 | None 620 | } 621 | } 622 | } 623 | } 624 | }; 625 | } 626 | 627 | impl_parallel_iter!(VecStore, VecStoreProducer, VecStoreIter); 628 | impl_parallel_iter!(DiskStore, DiskStoreProducer, DiskIter); 629 | //impl_parallel_iter!(LevelCacheStore, LevelCacheStoreProducer, LevelCacheIter); 630 | -------------------------------------------------------------------------------- /src/proof.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use std::marker::PhantomData; 3 | use typenum::marker_traits::Unsigned; 4 | use typenum::U2; 5 | 6 | use crate::hash::{Algorithm, Hashable}; 7 | use crate::merkle::get_merkle_proof_lemma_len; 8 | 9 | /// Merkle tree inclusion proof for data element, for which item = Leaf(Hash(Data Item)). 10 | /// 11 | /// Lemma layout: 12 | /// 13 | /// ```text 14 | /// [ item h1x h2y h3z ... root ] 15 | /// ``` 16 | /// 17 | /// Proof validation is positioned hash against lemma path to match root hash. 18 | #[derive(Debug, Clone, Eq, PartialEq)] 19 | /// U is the default tree arity (U2 = binary) 20 | pub struct Proof, BaseTreeArity: Unsigned = U2> { 21 | // Optional proofs at immediate lower level from current. Should 22 | // be None at the base layer. 23 | pub sub_tree_proof: Option>>, 24 | 25 | top_layer_nodes: usize, // arity of top layer 26 | sub_tree_layer_nodes: usize, // arity of sub-tree layer 27 | 28 | lemma: Vec, 29 | path: Vec, // branch index 30 | 31 | _u: PhantomData, // number of branches per node 32 | } 33 | 34 | impl, BaseTreeArity: Unsigned> Proof { 35 | /// Creates new MT inclusion proof 36 | pub fn new( 37 | sub_tree_proof: Option>>, 38 | lemma: Vec, 39 | path: Vec, 40 | ) -> Result> { 41 | if TopLayerArity::to_usize() == 0 && SubTreeArity::to_usize() == 0 { 42 | ensure!(lemma.len() > 2, "Invalid lemma length (short)"); 43 | ensure!( 44 | lemma.len() 45 | == get_merkle_proof_lemma_len(path.len() + 1, BaseTreeArity::to_usize()), 46 | "Invalid lemma length" 47 | ); 48 | } 49 | 50 | Ok(Proof { 51 | sub_tree_proof, 52 | top_layer_nodes: TopLayerArity::to_usize(), 53 | sub_tree_layer_nodes: SubTreeArity::to_usize(), 54 | lemma, 55 | path, 56 | 57 | _u: PhantomData, 58 | }) 59 | } 60 | 61 | /// Return proof target leaf 62 | pub fn item(&self) -> T { 63 | self.lemma 64 | .first() 65 | .expect("[item] requested element is empty") 66 | .clone() 67 | } 68 | 69 | /// Return sub tree root 70 | pub fn sub_tree_root(&self) -> T { 71 | assert!(self.sub_tree_layer_nodes > 0 && self.sub_tree_proof.is_some()); 72 | // unwrap is safe as we checked sub_tree_proof to be initialised 73 | self.sub_tree_proof.as_ref().unwrap().root() 74 | } 75 | 76 | /// Return tree root 77 | pub fn root(&self) -> T { 78 | self.lemma 79 | .last() 80 | .expect("[root] requested element is empty") 81 | .clone() 82 | } 83 | 84 | /// Validates sub-tree proofs with the specified arity. 85 | fn validate_sub_tree_proof>(&self, arity: usize) -> Result { 86 | // Ensure that the sub_tree validates to the root of that 87 | // sub_tree. 88 | let valid = self 89 | .sub_tree_proof 90 | .as_ref() 91 | .expect("[validate_sub_tree_proof] couldn't get sub_tree_proof value") 92 | .validate::()?; 93 | if !valid { 94 | return Ok(valid); 95 | } 96 | 97 | // Validate top-most/current layer 98 | // 99 | // Check that the remaining proof matches the tree root (note 100 | // that Proof::validate at the base layer cannot handle a 101 | // proof this small, so this is a version specific for what we 102 | // know we have in this case). 103 | let mut a = A::default(); 104 | a.reset(); 105 | let node_count = arity; 106 | let h = { 107 | let mut nodes: Vec = Vec::with_capacity(node_count); 108 | let mut cur_index = 0; 109 | for j in 0..node_count { 110 | if j == self.path()[0] { 111 | nodes.push(self.sub_tree_root().clone()); 112 | } else { 113 | nodes.push(self.lemma()[cur_index].clone()); 114 | cur_index += 1; 115 | } 116 | } 117 | 118 | if cur_index != node_count - 1 { 119 | return Ok(false); 120 | } 121 | 122 | a.multi_node(&nodes, 0) 123 | }; 124 | 125 | Ok(h == self.root()) 126 | } 127 | 128 | /// Verifies MT inclusion proof 129 | pub fn validate>(&self) -> Result { 130 | if self.top_layer_nodes > 0 { 131 | // Special Top layer handling here. 132 | ensure!( 133 | self.sub_tree_proof.is_some(), 134 | "Sub tree proof must be present for validation" 135 | ); 136 | 137 | return self.validate_sub_tree_proof::(self.top_layer_nodes); 138 | } 139 | 140 | if self.sub_tree_layer_nodes > 0 { 141 | // Sub-tree layer handling here. 142 | ensure!( 143 | self.sub_tree_proof.is_some(), 144 | "Sub tree proof must be present for validation" 145 | ); 146 | 147 | return self.validate_sub_tree_proof::(self.sub_tree_layer_nodes); 148 | } 149 | 150 | // Base layer handling here. 151 | ensure!( 152 | self.sub_tree_layer_nodes == 0, 153 | "Base layer proof must have 0 as sub-tree layer node count" 154 | ); 155 | ensure!( 156 | self.top_layer_nodes == 0, 157 | "Base layer proof must have 0 as top layer node count" 158 | ); 159 | ensure!(self.sub_tree_proof.is_none(), "Sub tree proof must be None"); 160 | 161 | let size = self.lemma.len(); 162 | if size < 2 { 163 | return Ok(false); 164 | } 165 | 166 | let branches = BaseTreeArity::to_usize(); 167 | let mut a = A::default(); 168 | let mut h = self.item(); 169 | let mut path_index = 1; 170 | 171 | for i in (1..size - 1).step_by(branches - 1) { 172 | a.reset(); 173 | h = { 174 | let mut nodes: Vec = Vec::with_capacity(branches); 175 | let mut cur_index = 0; 176 | for j in 0..branches { 177 | if j == self.path[path_index - 1] { 178 | nodes.push(h.clone()); 179 | } else { 180 | nodes.push(self.lemma[i + cur_index].clone()); 181 | cur_index += 1; 182 | } 183 | } 184 | 185 | if cur_index != branches - 1 { 186 | return Ok(false); 187 | } 188 | 189 | path_index += 1; 190 | a.multi_node(&nodes, i - 1) 191 | }; 192 | } 193 | 194 | Ok(h == self.root()) 195 | } 196 | 197 | /// Verifies MT inclusion proof and that leaf_data is the original leaf data for which proof was generated. 198 | pub fn validate_with_data>(&self, leaf_data: &dyn Hashable) -> Result { 199 | let mut a = A::default(); 200 | leaf_data.hash(&mut a); 201 | let item = a.hash(); 202 | a.reset(); 203 | let leaf_hash = a.leaf(item); 204 | 205 | if leaf_hash == self.item() { 206 | self.validate::() 207 | } else { 208 | Ok(false) 209 | } 210 | } 211 | 212 | /// Returns the path of this proof. 213 | pub fn path(&self) -> &Vec { 214 | &self.path 215 | } 216 | 217 | /// Returns the lemma of this proof. 218 | pub fn lemma(&self) -> &Vec { 219 | &self.lemma 220 | } 221 | 222 | /// Returns the lemma of this proof as mutable. 223 | pub fn lemma_mut(&mut self) -> &mut Vec { 224 | &mut self.lemma 225 | } 226 | 227 | pub fn top_layer_nodes(&self) -> usize { 228 | self.top_layer_nodes 229 | } 230 | 231 | pub fn sub_layer_nodes(&self) -> usize { 232 | self.sub_tree_layer_nodes 233 | } 234 | } 235 | 236 | #[cfg(test)] 237 | mod tests { 238 | use crate::hash::{Algorithm, Hashable}; 239 | use crate::merkle::Element; 240 | use crate::merkle::MerkleTree; 241 | use crate::proof::Proof; 242 | use crate::store::VecStore; 243 | use crate::test_legacy::{get_vec_tree_from_slice, Item, Sha256Hasher, XOR128}; 244 | use typenum::{Unsigned, U0, U1, U2, U3, U4, U5, U8}; 245 | 246 | // Break one element inside the proof's top layer (if available). 247 | // Otherwise, break the sub-proof. 248 | fn modify_proof< 249 | E: Element, 250 | A: Algorithm, 251 | BaseTreeArity: Unsigned, 252 | SubTreeArity: Unsigned, 253 | TopTreeArity: Unsigned, 254 | >( 255 | proof: &mut Proof, 256 | ) { 257 | use rand::prelude::*; 258 | 259 | if TopTreeArity::to_usize() > 0 { 260 | assert!(proof.sub_tree_proof.is_some()); 261 | assert!(proof 262 | .sub_tree_proof 263 | .as_ref() 264 | .unwrap() // safe 265 | .sub_tree_proof 266 | .is_some()); 267 | } else if SubTreeArity::to_usize() > 0 { 268 | assert!(proof.sub_tree_proof.is_some()); 269 | } 270 | 271 | let mut hasher_alg = A::default(); 272 | let mut tmp = vec![0u8; E::byte_len()]; 273 | 274 | if TopTreeArity::to_usize() > 0 || SubTreeArity::to_usize() > 0 { 275 | // unwrap is safe as we checked sub_tree_proof to be initialised 276 | let i = random::() % proof.sub_tree_proof.as_ref().unwrap().lemma().len(); 277 | let j = random::(); 278 | 279 | j.hash(&mut hasher_alg); 280 | 281 | // Break random sub-tree proof element 282 | 283 | // unwrap is safe as we checked sub_tree_proof to be initialised 284 | proof.sub_tree_proof.as_ref().unwrap().lemma()[i].copy_to_slice(&mut tmp); 285 | tmp.hash(&mut hasher_alg); 286 | // unwrap is safe as we checked sub_tree_proof to be initialised 287 | proof.sub_tree_proof.as_mut().unwrap().lemma_mut()[i] = hasher_alg.hash(); 288 | } else { 289 | let i = random::() % proof.lemma.len(); 290 | let k = random::(); 291 | 292 | k.hash(&mut hasher_alg); 293 | 294 | // Break random element 295 | proof.lemma[i].copy_to_slice(&mut tmp); 296 | tmp.hash(&mut hasher_alg); 297 | proof.lemma[i] = hasher_alg.hash(); 298 | } 299 | } 300 | 301 | #[test] 302 | fn test_proofs() { 303 | fn run_test< 304 | E: Element, 305 | A: Algorithm, 306 | BaseTreeArity: Unsigned, 307 | SubTreeArity: Unsigned, 308 | TopTreeArity: Unsigned, 309 | >() { 310 | let leafs = 32768; 311 | let tree = get_vec_tree_from_slice::(leafs); 312 | 313 | for i in 0..tree.leafs() { 314 | let mut p = tree 315 | .gen_proof(i) 316 | .expect("[test_proofs] failed to generate Merkle proof"); 317 | assert!(p.validate::().expect("[test_proofs] failed to validate")); 318 | 319 | // Break the proof here and assert negative validation. 320 | modify_proof::(&mut p); 321 | assert!(!p.validate::().expect("[test_proofs] failed to validate")); 322 | } 323 | } 324 | 325 | run_test::(); 326 | run_test::(); 327 | } 328 | 329 | #[test] 330 | fn test_compound_quad_broken_proofs() { 331 | fn run_test< 332 | E: Element, 333 | A: Algorithm, 334 | BaseTreeArity: Unsigned, 335 | SubTreeArity: Unsigned, 336 | TopTreeArity: Unsigned, 337 | >() { 338 | let leafs = 16384; 339 | let mt1 = get_vec_tree_from_slice::(leafs); 340 | let mt2 = get_vec_tree_from_slice::(leafs); 341 | let mt3 = get_vec_tree_from_slice::(leafs); 342 | 343 | let tree: MerkleTree, BaseTreeArity, SubTreeArity> = 344 | MerkleTree::from_trees(vec![mt1, mt2, mt3]) 345 | .expect("[test_compound_quad_broken_proofs] Failed to build compound tree"); 346 | 347 | for i in 0..tree.leafs() { 348 | let mut p = tree 349 | .gen_proof(i) 350 | .expect("[test_compound_quad_broken_proofs] failed to generate Merkle proof"); 351 | assert!(p 352 | .validate::() 353 | .expect("[test_compound_quad_broken_proofs] failed to validate")); 354 | 355 | modify_proof::(&mut p); 356 | assert!(!p 357 | .validate::() 358 | .expect("[test_compound_quad_broken_proofs] failed to validate")); 359 | } 360 | } 361 | run_test::(); 362 | run_test::(); 363 | } 364 | 365 | #[test] 366 | fn test_compound_single_octree_broken_proofs() { 367 | fn run_test< 368 | E: Element, 369 | A: Algorithm, 370 | BaseTreeArity: Unsigned, 371 | SubTreeArity: Unsigned, 372 | TopTreeArity: Unsigned, 373 | >() { 374 | let leafs = 32768; 375 | let mt1 = get_vec_tree_from_slice::(leafs); 376 | 377 | let tree: MerkleTree, BaseTreeArity, SubTreeArity> = 378 | MerkleTree::from_trees(vec![mt1]).expect( 379 | "[test_compound_single_octree_broken_proofs] Failed to build compound tree", 380 | ); 381 | 382 | for i in 0..tree.leafs() { 383 | let mut p = tree.gen_proof(i).expect( 384 | "[test_compound_single_octree_broken_proofs] failed to generate Merkle proof", 385 | ); 386 | assert!(p 387 | .validate::() 388 | .expect("[test_compound_single_octree_broken_proofs] failed to validate")); 389 | 390 | modify_proof::(&mut p); 391 | assert!(!p 392 | .validate::() 393 | .expect("[test_compound_single_octree_broken_proofs] failed to validate")); 394 | } 395 | } 396 | run_test::(); 397 | run_test::(); 398 | } 399 | 400 | #[test] 401 | #[ignore] 402 | fn test_compound_octree_broken_proofs() { 403 | fn run_test< 404 | E: Element, 405 | A: Algorithm, 406 | BaseTreeArity: Unsigned, 407 | SubTreeArity: Unsigned, 408 | TopTreeArity: Unsigned, 409 | >() { 410 | let leafs = 32768; 411 | let mt1 = get_vec_tree_from_slice::(leafs); 412 | let mt2 = get_vec_tree_from_slice::(leafs); 413 | let mt3 = get_vec_tree_from_slice::(leafs); 414 | let mt4 = get_vec_tree_from_slice::(leafs); 415 | 416 | let tree: MerkleTree, BaseTreeArity, SubTreeArity> = 417 | MerkleTree::from_trees(vec![mt1, mt2, mt3, mt4]) 418 | .expect("[test_compound_octree_broken_proofs] Failed to build compound tree"); 419 | 420 | for i in 0..tree.leafs() { 421 | let mut p = tree 422 | .gen_proof(i) 423 | .expect("[test_compound_octree_broken_proofs] failed to generate Merkle proof"); 424 | assert!(p 425 | .validate::() 426 | .expect("[test_compound_octree_broken_proofs] failed to validate")); 427 | 428 | modify_proof::(&mut p); 429 | assert!(!p 430 | .validate::() 431 | .expect("[test_compound_octree_broken_proofs] failed to validate")); 432 | } 433 | } 434 | run_test::(); 435 | run_test::(); 436 | } 437 | 438 | #[test] 439 | fn test_compound_compound_quad_broken_proofs() { 440 | fn run_test< 441 | E: Element, 442 | A: Algorithm, 443 | BaseTreeArity: Unsigned, 444 | SubTreeArity: Unsigned, 445 | TopTreeArity: Unsigned, 446 | >() { 447 | let leafs = 16384; 448 | 449 | let mt1 = get_vec_tree_from_slice::(leafs); 450 | let mt2 = get_vec_tree_from_slice::(leafs); 451 | let mt3 = get_vec_tree_from_slice::(leafs); 452 | let cmt1: MerkleTree, BaseTreeArity, SubTreeArity> = 453 | MerkleTree::from_trees(vec![mt1, mt2, mt3]) 454 | .expect("[test_compound_compound_quad_broken_proofs] failed to build compound merkle tree"); 455 | 456 | let mt4 = get_vec_tree_from_slice::(leafs); 457 | let mt5 = get_vec_tree_from_slice::(leafs); 458 | let mt6 = get_vec_tree_from_slice::(leafs); 459 | let cmt2: MerkleTree, BaseTreeArity, SubTreeArity> = 460 | MerkleTree::from_trees(vec![mt4, mt5, mt6]) 461 | .expect("[test_compound_compound_quad_broken_proofs] failed to build compound merkle tree"); 462 | 463 | let mt7 = get_vec_tree_from_slice::(leafs); 464 | let mt8 = get_vec_tree_from_slice::(leafs); 465 | let mt9 = get_vec_tree_from_slice::(leafs); 466 | let cmt3: MerkleTree, BaseTreeArity, SubTreeArity> = 467 | MerkleTree::from_trees(vec![mt7, mt8, mt9]) 468 | .expect("[test_compound_compound_quad_broken_proofs] failed to build compound merkle tree"); 469 | 470 | let tree: MerkleTree, BaseTreeArity, SubTreeArity, TopTreeArity> = 471 | MerkleTree::from_sub_trees(vec![cmt1, cmt2, cmt3]) 472 | .expect("[test_compound_compound_quad_broken_proofs] Failed to build compound-compound tree"); 473 | 474 | for i in 0..tree.leafs() { 475 | let mut p = tree.gen_proof(i).expect("failed to generate Merkle proof"); 476 | assert!(p 477 | .validate::() 478 | .expect("[test_compound_compound_quad_broken_proofs] failed to validate")); 479 | 480 | modify_proof::(&mut p); 481 | assert!(!p 482 | .validate::() 483 | .expect("[test_compound_compound_quad_broken_proofs] failed to validate")); 484 | } 485 | } 486 | 487 | run_test::(); 488 | run_test::(); 489 | } 490 | 491 | #[test] 492 | #[ignore] 493 | fn test_compound_compound_single_quad_broken_proofs() { 494 | fn run_test< 495 | E: Element, 496 | A: Algorithm, 497 | BaseTreeArity: Unsigned, 498 | SubTreeArity: Unsigned, 499 | TopTreeArity: Unsigned, 500 | >() { 501 | let leafs = 16384; 502 | 503 | let mt1 = get_vec_tree_from_slice::(leafs); 504 | let mt2 = get_vec_tree_from_slice::(leafs); 505 | let mt3 = get_vec_tree_from_slice::(leafs); 506 | let cmt1: MerkleTree, BaseTreeArity, SubTreeArity> = 507 | MerkleTree::from_trees(vec![mt1, mt2, mt3]) 508 | .expect("[test_compound_compound_single_quad_broken_proofs] failed to build compound merkle tree"); 509 | 510 | let tree: MerkleTree, BaseTreeArity, SubTreeArity, TopTreeArity> = 511 | MerkleTree::from_sub_trees(vec![cmt1]) 512 | .expect("[test_compound_compound_single_quad_broken_proofs] Failed to build compound-compound tree"); 513 | 514 | for i in 0..tree.leafs() { 515 | let mut p = tree.gen_proof(i).expect("[test_compound_compound_single_quad_broken_proofs] failed to generate Merkle proof"); 516 | assert!(p.validate::().expect( 517 | "[test_compound_compound_single_quad_broken_proofs] failed to validate" 518 | )); 519 | 520 | // TODO investigate why SubTree and TopTree are substituted (in origin test) 521 | modify_proof::(&mut p); 522 | assert!(!p.validate::().expect( 523 | "[test_compound_compound_single_quad_broken_proofs] failed to validate" 524 | )); 525 | } 526 | } 527 | run_test::(); 528 | run_test::(); 529 | } 530 | 531 | #[test] 532 | #[ignore] 533 | fn test_compound_compound_octree_broken_proofs() { 534 | fn run_test< 535 | E: Element, 536 | A: Algorithm, 537 | BaseTreeArity: Unsigned, 538 | SubTreeArity: Unsigned, 539 | TopTreeArity: Unsigned, 540 | >() { 541 | let leafs = 32768; 542 | 543 | let mt1 = get_vec_tree_from_slice::(leafs); 544 | let mt2 = get_vec_tree_from_slice::(leafs); 545 | let mt3 = get_vec_tree_from_slice::(leafs); 546 | let mt4 = get_vec_tree_from_slice::(leafs); 547 | let cmt1: MerkleTree, BaseTreeArity, SubTreeArity> = 548 | MerkleTree::from_trees(vec![mt1, mt2, mt3, mt4]).expect( 549 | "[test_compound_compound_octree_broken_proofs] Failed to build compound tree", 550 | ); 551 | 552 | let mt5 = get_vec_tree_from_slice::(leafs); 553 | let mt6 = get_vec_tree_from_slice::(leafs); 554 | let mt7 = get_vec_tree_from_slice::(leafs); 555 | let mt8 = get_vec_tree_from_slice::(leafs); 556 | let cmt2: MerkleTree, BaseTreeArity, SubTreeArity> = 557 | MerkleTree::from_trees(vec![mt5, mt6, mt7, mt8]).expect( 558 | "[test_compound_compound_octree_broken_proofs] Failed to build compound tree", 559 | ); 560 | 561 | let mt9 = get_vec_tree_from_slice::(leafs); 562 | let mt10 = get_vec_tree_from_slice::(leafs); 563 | let mt11 = get_vec_tree_from_slice::(leafs); 564 | let mt12 = get_vec_tree_from_slice::(leafs); 565 | let cmt3: MerkleTree, BaseTreeArity, SubTreeArity> = 566 | MerkleTree::from_trees(vec![mt9, mt10, mt11, mt12]).expect( 567 | "[test_compound_compound_octree_broken_proofs] Failed to build compound tree", 568 | ); 569 | 570 | let mt13 = get_vec_tree_from_slice::(leafs); 571 | let mt14 = get_vec_tree_from_slice::(leafs); 572 | let mt15 = get_vec_tree_from_slice::(leafs); 573 | let mt16 = get_vec_tree_from_slice::(leafs); 574 | let cmt4: MerkleTree, BaseTreeArity, SubTreeArity> = 575 | MerkleTree::from_trees(vec![mt13, mt14, mt15, mt16]).expect( 576 | "[test_compound_compound_octree_broken_proofs] Failed to build compound tree", 577 | ); 578 | 579 | let mt17 = get_vec_tree_from_slice::(leafs); 580 | let mt18 = get_vec_tree_from_slice::(leafs); 581 | let mt19 = get_vec_tree_from_slice::(leafs); 582 | let mt20 = get_vec_tree_from_slice::(leafs); 583 | let cmt5: MerkleTree, BaseTreeArity, SubTreeArity> = 584 | MerkleTree::from_trees(vec![mt17, mt18, mt19, mt20]).expect( 585 | "[test_compound_compound_octree_broken_proofs] Failed to build compound tree", 586 | ); 587 | 588 | let tree: MerkleTree, BaseTreeArity, SubTreeArity, TopTreeArity> = 589 | MerkleTree::from_sub_trees(vec![cmt1, cmt2, cmt3, cmt4, cmt5]) 590 | .expect("[test_compound_compound_octree_broken_proofs] Failed to build compound-compound tree"); 591 | 592 | for i in 0..tree.leafs() { 593 | let mut p = tree.gen_proof(i).expect( 594 | "[test_compound_compound_octree_broken_proofs] failed to generate Merkle proof", 595 | ); 596 | assert!(p 597 | .validate::() 598 | .expect("[test_compound_compound_octree_broken_proofs] failed to validate")); 599 | 600 | // TODO investigate why SubTree and TopTree are substituted (in origin test) 601 | modify_proof::(&mut p); 602 | assert!(!p 603 | .validate::() 604 | .expect("[test_compound_compound_octree_broken_proofs] failed to validate")); 605 | } 606 | } 607 | run_test::(); 608 | run_test::(); 609 | } 610 | 611 | #[test] 612 | #[ignore] 613 | fn test_compound_compound_single_octree_broken_proofs() { 614 | fn run_test< 615 | E: Element, 616 | A: Algorithm, 617 | BaseTreeArity: Unsigned, 618 | SubTreeArity: Unsigned, 619 | TopTreeArity: Unsigned, 620 | >() { 621 | let leafs = 32768; 622 | 623 | let mt1 = get_vec_tree_from_slice::(leafs); 624 | let mt2 = get_vec_tree_from_slice::(leafs); 625 | let mt3 = get_vec_tree_from_slice::(leafs); 626 | let mt4 = get_vec_tree_from_slice::(leafs); 627 | let cmt1: MerkleTree, BaseTreeArity, SubTreeArity> = 628 | MerkleTree::from_trees(vec![mt1, mt2, mt3, mt4]) 629 | .expect("[test_compound_compound_single_octree_broken_proofs] Failed to build compound tree"); 630 | 631 | let tree: MerkleTree, BaseTreeArity, SubTreeArity, TopTreeArity> = 632 | MerkleTree::from_sub_trees(vec![cmt1]).expect("[test_compound_compound_single_octree_broken_proofs] Failed to build ccompound tree"); 633 | 634 | for i in 0..tree.leafs() { 635 | let mut p = tree.gen_proof(i).expect("[test_compound_compound_single_octree_broken_proofs] failed to generate Merkle proof"); 636 | assert!(p.validate::().expect( 637 | "[test_compound_compound_single_octree_broken_proofs] failed to validate" 638 | )); 639 | 640 | // TODO investigate why SubTree and TopTree are substituted (in origin test) 641 | modify_proof::(&mut p); 642 | assert!(!p.validate::().expect( 643 | "[test_compound_compound_single_octree_broken_proofs] failed to validate" 644 | )); 645 | } 646 | } 647 | 648 | run_test::(); 649 | run_test::(); 650 | } 651 | } 652 | -------------------------------------------------------------------------------- /src/store/level_cache.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::fs::{remove_file, File, OpenOptions}; 3 | use std::io::{copy, Read, Seek, SeekFrom}; 4 | use std::iter::FromIterator; 5 | use std::marker::PhantomData; 6 | use std::ops; 7 | use std::path::Path; 8 | use std::sync::{Arc, RwLock}; 9 | 10 | use anyhow::{Context, Result}; 11 | use log::warn; 12 | use memmap2::MmapOptions; 13 | use positioned_io::{ReadAt, WriteAt}; 14 | use rayon::iter::*; 15 | use rayon::prelude::*; 16 | use tempfile::tempfile; 17 | use typenum::marker_traits::Unsigned; 18 | 19 | use crate::hash::Algorithm; 20 | use crate::merkle::{ 21 | get_merkle_tree_cache_size, get_merkle_tree_leafs, get_merkle_tree_len, log2_pow2, next_pow2, 22 | Element, 23 | }; 24 | use crate::store::{ExternalReader, Store, StoreConfig, BUILD_CHUNK_NODES}; 25 | 26 | /// The LevelCacheStore is used to reduce the on-disk footprint even 27 | /// further to the minimum at the cost of build time performance. 28 | /// Each LevelCacheStore is created with a StoreConfig object which 29 | /// contains the number of binary tree levels above the base that are 30 | /// 'cached'. This implementation has hard requirements about the on 31 | /// disk file size based on that number of levels, so on-disk files 32 | /// are tied, structurally to the configuration they were built with 33 | /// and can only be accessed with the same number of levels. 34 | pub struct LevelCacheStore { 35 | len: usize, 36 | elem_len: usize, 37 | file: File, 38 | 39 | // The number of base layer data items. 40 | data_width: usize, 41 | 42 | // The byte index of where the cached data begins. 43 | cache_index_start: usize, 44 | 45 | // This flag is useful only immediate after instantiation, which 46 | // is false if the store was newly initialized and true if the 47 | // store was loaded from already existing on-disk data. 48 | loaded_from_disk: bool, 49 | 50 | // We cache the on-disk file size to avoid accessing disk 51 | // unnecessarily. 52 | store_size: usize, 53 | 54 | // If provided, the store will use this method to access base 55 | // layer data. 56 | reader: Option>, 57 | 58 | _e: PhantomData, 59 | } 60 | 61 | impl fmt::Debug for LevelCacheStore { 62 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 63 | f.debug_struct("LevelCacheStore") 64 | .field("len", &self.len) 65 | .field("elem_len", &self.len) 66 | .field("data_width", &self.data_width) 67 | .field("loaded_from_disk", &self.loaded_from_disk) 68 | .field("cache_index_start", &self.cache_index_start) 69 | .field("store_size", &self.store_size) 70 | .finish() 71 | } 72 | } 73 | 74 | impl LevelCacheStore { 75 | /// Used for opening v2 compacted DiskStores. 76 | pub fn new_from_disk_with_reader( 77 | store_range: usize, 78 | branches: usize, 79 | config: &StoreConfig, 80 | reader: ExternalReader, 81 | ) -> Result { 82 | let data_path = StoreConfig::data_path(&config.path, &config.id); 83 | 84 | ensure!(Path::new(&data_path).exists(), "[LevelCacheStore - new_from_disk_with_reader] new_from_disk_with_reader constructor can be used only for instantiating already existing storages"); 85 | 86 | let file = match OpenOptions::new().write(true).read(true).open(&data_path) { 87 | Ok(file) => file, 88 | Err(e) => { 89 | if e.kind() == std::io::ErrorKind::PermissionDenied { 90 | warn!("[LevelCacheStore - new_from_disk_with_reader] Permission denied occurred. Try to open storage as read-only"); 91 | } 92 | OpenOptions::new() 93 | .write(false) 94 | .read(true) 95 | .open(&data_path)? 96 | } 97 | }; 98 | 99 | let metadata = file.metadata()?; 100 | let store_size = metadata.len() as usize; 101 | 102 | // The LevelCacheStore base data layer must already be a 103 | // massaged next pow2 (guaranteed if created with 104 | // DiskStore::compact, which is the only supported method at 105 | // the moment). 106 | let size = get_merkle_tree_leafs(store_range, branches)?; 107 | ensure!( 108 | size == next_pow2(size), 109 | "Inconsistent merkle tree row_count detected" 110 | ); 111 | 112 | // Values below in bytes. 113 | // Convert store_range from an element count to bytes. 114 | let store_range = store_range * E::byte_len(); 115 | 116 | // LevelCacheStore on disk file is only the cached data, so 117 | // the file size dictates the cache_size. Calculate cache 118 | // start and the updated size with repect to the file size. 119 | let cache_size = 120 | get_merkle_tree_cache_size(size, branches, config.rows_to_discard)? * E::byte_len(); 121 | let cache_index_start = store_range - cache_size; 122 | 123 | // Sanity checks that the StoreConfig rows_to_discard matches this 124 | // particular on-disk file. Since an external reader *is* 125 | // set, we check to make sure that the data on disk is *only* 126 | // the cached element data. 127 | ensure!( 128 | store_size == cache_size, 129 | "Inconsistent store size detected with external reader ({} != {})", 130 | store_size, 131 | cache_size, 132 | ); 133 | 134 | Ok(LevelCacheStore { 135 | len: store_range / E::byte_len(), 136 | elem_len: E::byte_len(), 137 | file, 138 | data_width: size, 139 | cache_index_start, 140 | store_size, 141 | loaded_from_disk: false, 142 | reader: Some(reader), 143 | _e: Default::default(), 144 | }) 145 | } 146 | 147 | pub fn set_external_reader(&mut self, reader: ExternalReader) -> Result<()> { 148 | self.reader = Some(reader); 149 | 150 | Ok(()) 151 | } 152 | } 153 | 154 | impl Store for LevelCacheStore { 155 | fn new_with_config(size: usize, branches: usize, config: StoreConfig) -> Result { 156 | let data_path = StoreConfig::data_path(&config.path, &config.id); 157 | 158 | // If the specified file exists, load it from disk. This is 159 | // the only supported usage of this call for this type of 160 | // Store. 161 | if Path::new(&data_path).exists() { 162 | return Self::new_from_disk(size, branches, &config); 163 | } 164 | 165 | // Otherwise, create the file and allow it to be the on-disk store. 166 | let file = OpenOptions::new() 167 | .write(true) 168 | .read(true) 169 | .create_new(true) 170 | .open(data_path)?; 171 | 172 | let store_size = E::byte_len() * size; 173 | let leafs = get_merkle_tree_leafs(size, branches)?; 174 | 175 | ensure!( 176 | leafs == next_pow2(leafs), 177 | "Inconsistent merkle tree row_count detected" 178 | ); 179 | 180 | // Calculate cache start and the updated size with repect to 181 | // the data size. 182 | let cache_size = 183 | get_merkle_tree_cache_size(leafs, branches, config.rows_to_discard)? * E::byte_len(); 184 | let cache_index_start = store_size - cache_size; 185 | 186 | file.set_len(store_size as u64)?; 187 | 188 | Ok(LevelCacheStore { 189 | len: 0, 190 | elem_len: E::byte_len(), 191 | file, 192 | data_width: leafs, 193 | cache_index_start, 194 | store_size, 195 | loaded_from_disk: false, 196 | reader: None, 197 | _e: Default::default(), 198 | }) 199 | } 200 | 201 | fn new(size: usize) -> Result { 202 | let store_size = E::byte_len() * size; 203 | let file = tempfile()?; 204 | file.set_len(store_size as u64)?; 205 | 206 | Ok(LevelCacheStore { 207 | len: 0, 208 | elem_len: E::byte_len(), 209 | file, 210 | data_width: size, 211 | cache_index_start: 0, 212 | store_size, 213 | loaded_from_disk: false, 214 | reader: None, 215 | _e: Default::default(), 216 | }) 217 | } 218 | 219 | fn new_from_slice_with_config( 220 | size: usize, 221 | branches: usize, 222 | data: &[u8], 223 | config: StoreConfig, 224 | ) -> Result { 225 | ensure!( 226 | data.len() % E::byte_len() == 0, 227 | "data size must be a multiple of {}", 228 | E::byte_len() 229 | ); 230 | 231 | let mut store = Self::new_with_config(size, branches, config)?; 232 | 233 | // If the store was loaded from disk (based on the config 234 | // information, avoid re-populating the store at this point 235 | // since it can be assumed by the config that the data is 236 | // already correct). 237 | if !store.loaded_from_disk { 238 | store.store_copy_from_slice(0, data)?; 239 | store.len = data.len() / store.elem_len; 240 | } 241 | 242 | Ok(store) 243 | } 244 | 245 | fn new_from_slice(size: usize, data: &[u8]) -> Result { 246 | ensure!( 247 | data.len() % E::byte_len() == 0, 248 | "data size must be a multiple of {}", 249 | E::byte_len() 250 | ); 251 | 252 | let mut store = Self::new(size)?; 253 | store.store_copy_from_slice(0, data)?; 254 | store.len = data.len() / store.elem_len; 255 | 256 | Ok(store) 257 | } 258 | 259 | // Used for opening v1 compacted DiskStores. 260 | fn new_from_disk(store_range: usize, branches: usize, config: &StoreConfig) -> Result { 261 | let data_path = StoreConfig::data_path(&config.path, &config.id); 262 | 263 | ensure!(Path::new(&data_path).exists(), "[LevelCacheStore] new_from_disk constructor can be used only for instantiating already existing storages"); 264 | 265 | let file = match OpenOptions::new().write(true).read(true).open(&data_path) { 266 | Ok(file) => file, 267 | Err(e) => { 268 | if e.kind() == std::io::ErrorKind::PermissionDenied { 269 | warn!("[LevelCacheStore - new_from_disk] Permission denied occurred. Try to open storage as read-only"); 270 | } 271 | OpenOptions::new() 272 | .write(false) 273 | .read(true) 274 | .open(&data_path)? 275 | } 276 | }; 277 | 278 | let metadata = file.metadata()?; 279 | let store_size = metadata.len() as usize; 280 | 281 | // The LevelCacheStore base data layer must already be a 282 | // massaged next pow2 (guaranteed if created with 283 | // DiskStore::compact, which is the only supported method at 284 | // the moment). 285 | let size = get_merkle_tree_leafs(store_range, branches)?; 286 | ensure!( 287 | size == next_pow2(size), 288 | "Inconsistent merkle tree row_count detected" 289 | ); 290 | 291 | // Values below in bytes. 292 | // Convert store_range from an element count to bytes. 293 | let store_range = store_range * E::byte_len(); 294 | 295 | // Calculate cache start and the updated size with repect to 296 | // the data size. 297 | let cache_size = 298 | get_merkle_tree_cache_size(size, branches, config.rows_to_discard)? * E::byte_len(); 299 | let cache_index_start = store_range - cache_size; 300 | 301 | // For a true v1 compatible store, this check should remain, 302 | // but since the store structure is identical otherwise this 303 | // method can be re-used to open v2 stores, so long as an 304 | // external_reader is set afterward. 305 | 306 | // Sanity checks that the StoreConfig rows_to_discard matches this 307 | // particular on-disk file. 308 | /* 309 | ensure!( 310 | store_size == size * E::byte_len() + cache_size, 311 | "Inconsistent store size detected" 312 | ); 313 | */ 314 | 315 | Ok(LevelCacheStore { 316 | len: store_range / E::byte_len(), 317 | elem_len: E::byte_len(), 318 | file, 319 | data_width: size, 320 | cache_index_start, 321 | loaded_from_disk: true, 322 | store_size, 323 | reader: None, 324 | _e: Default::default(), 325 | }) 326 | } 327 | 328 | fn write_at(&mut self, el: E, index: usize) -> Result<()> { 329 | self.store_copy_from_slice(index * self.elem_len, el.as_ref())?; 330 | self.len = std::cmp::max(self.len, index + 1); 331 | 332 | Ok(()) 333 | } 334 | 335 | fn copy_from_slice(&mut self, buf: &[u8], start: usize) -> Result<()> { 336 | ensure!( 337 | buf.len() % self.elem_len == 0, 338 | "buf size must be a multiple of {}", 339 | self.elem_len 340 | ); 341 | self.store_copy_from_slice(start * self.elem_len, buf)?; 342 | self.len = std::cmp::max(self.len, start + buf.len() / self.elem_len); 343 | 344 | Ok(()) 345 | } 346 | 347 | fn read_at(&self, index: usize) -> Result { 348 | let start = index * self.elem_len; 349 | let end = start + self.elem_len; 350 | 351 | let len = self.len * self.elem_len; 352 | ensure!(start < len, "start out of range {} >= {}", start, len); 353 | ensure!(end <= len, "end out of range {} > {}", end, len); 354 | ensure!( 355 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 356 | "out of bounds" 357 | ); 358 | 359 | Ok(E::from_slice(&self.store_read_range(start, end)?)) 360 | } 361 | 362 | fn read_into(&self, index: usize, buf: &mut [u8]) -> Result<()> { 363 | let start = index * self.elem_len; 364 | let end = start + self.elem_len; 365 | 366 | let len = self.len * self.elem_len; 367 | ensure!(start < len, "start out of range {} >= {}", start, len); 368 | ensure!(end <= len, "end out of range {} > {}", end, len); 369 | ensure!( 370 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 371 | "out of bounds" 372 | ); 373 | 374 | self.store_read_into(start, end, buf) 375 | } 376 | 377 | fn read_range_into(&self, start: usize, end: usize, buf: &mut [u8]) -> Result<()> { 378 | let start = start * self.elem_len; 379 | let end = end * self.elem_len; 380 | 381 | let len = self.len * self.elem_len; 382 | ensure!(start < len, "start out of range {} >= {}", start, len); 383 | ensure!(end <= len, "end out of range {} > {}", end, len); 384 | ensure!( 385 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 386 | "out of bounds" 387 | ); 388 | 389 | self.store_read_into(start, end, buf) 390 | } 391 | 392 | fn read_range(&self, r: ops::Range) -> Result> { 393 | let start = r.start * self.elem_len; 394 | let end = r.end * self.elem_len; 395 | 396 | let len = self.len * self.elem_len; 397 | ensure!(start < len, "start out of range {} >= {}", start, len); 398 | ensure!(end <= len, "end out of range {} > {}", end, len); 399 | ensure!( 400 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 401 | "out of bounds" 402 | ); 403 | 404 | Ok(self 405 | .store_read_range(start, end)? 406 | .chunks(self.elem_len) 407 | .map(E::from_slice) 408 | .collect()) 409 | } 410 | 411 | fn len(&self) -> usize { 412 | self.len 413 | } 414 | 415 | fn loaded_from_disk(&self) -> bool { 416 | self.loaded_from_disk 417 | } 418 | 419 | fn compact( 420 | &mut self, 421 | _branches: usize, 422 | _config: StoreConfig, 423 | _store_version: u32, 424 | ) -> Result { 425 | bail!("Cannot compact this type of Store"); 426 | } 427 | 428 | fn delete(config: StoreConfig) -> Result<()> { 429 | let path = StoreConfig::data_path(&config.path, &config.id); 430 | remove_file(&path).with_context(|| format!("Failed to delete {:?}", &path)) 431 | } 432 | 433 | fn is_empty(&self) -> bool { 434 | self.len == 0 435 | } 436 | 437 | fn push(&mut self, el: E) -> Result<()> { 438 | let len = self.len; 439 | ensure!( 440 | (len + 1) * self.elem_len <= self.store_size(), 441 | "not enough space, len: {}, E size {}, store len {}", 442 | len, 443 | self.elem_len, 444 | self.store_size() 445 | ); 446 | 447 | self.write_at(el, len) 448 | } 449 | 450 | fn sync(&self) -> Result<()> { 451 | self.file.sync_all().context("failed to sync file") 452 | } 453 | 454 | #[allow(unsafe_code)] 455 | fn process_layer, U: Unsigned>( 456 | &mut self, 457 | width: usize, 458 | level: usize, 459 | read_start: usize, 460 | write_start: usize, 461 | ) -> Result<()> { 462 | // Safety: this operation is safe becase it's a limited 463 | // writable region on the backing store managed by this type. 464 | let mut mmap = unsafe { 465 | let mut mmap_options = MmapOptions::new(); 466 | mmap_options 467 | .offset((write_start * E::byte_len()) as u64) 468 | .len(width * E::byte_len()) 469 | .map_mut(&self.file) 470 | }?; 471 | 472 | let data_lock = Arc::new(RwLock::new(self)); 473 | let branches = U::to_usize(); 474 | let shift = log2_pow2(branches); 475 | let write_chunk_width = (BUILD_CHUNK_NODES >> shift) * E::byte_len(); 476 | 477 | ensure!(BUILD_CHUNK_NODES % branches == 0, "Invalid chunk size"); 478 | Vec::from_iter((read_start..read_start + width).step_by(BUILD_CHUNK_NODES)) 479 | .into_par_iter() 480 | .zip(mmap.par_chunks_mut(write_chunk_width)) 481 | .try_for_each(|(chunk_index, write_mmap)| -> Result<()> { 482 | let chunk_size = std::cmp::min(BUILD_CHUNK_NODES, read_start + width - chunk_index); 483 | 484 | let chunk_nodes = { 485 | // Read everything taking the lock once. 486 | data_lock 487 | .read() 488 | .expect("[process_layer] couldn't block current thread") 489 | .read_range_internal(chunk_index..chunk_index + chunk_size)? 490 | }; 491 | 492 | let nodes_size = (chunk_nodes.len() / branches) * E::byte_len(); 493 | let hashed_nodes_as_bytes = chunk_nodes.chunks(branches).fold( 494 | Vec::with_capacity(nodes_size), 495 | |mut acc, nodes| { 496 | let h = A::default().multi_node(nodes, level); 497 | acc.extend_from_slice(h.as_ref()); 498 | acc 499 | }, 500 | ); 501 | 502 | // Check that we correctly pre-allocated the space. 503 | let hashed_nodes_as_bytes_len = hashed_nodes_as_bytes.len(); 504 | ensure!( 505 | hashed_nodes_as_bytes.len() == chunk_size / branches * E::byte_len(), 506 | "Invalid hashed node length" 507 | ); 508 | 509 | write_mmap[0..hashed_nodes_as_bytes_len].copy_from_slice(&hashed_nodes_as_bytes); 510 | 511 | Ok(()) 512 | }) 513 | } 514 | 515 | // LevelCacheStore specific merkle-tree build. 516 | fn build, U: Unsigned>( 517 | &mut self, 518 | leafs: usize, 519 | row_count: usize, 520 | config: Option, 521 | ) -> Result { 522 | let branches = U::to_usize(); 523 | ensure!( 524 | next_pow2(branches) == branches, 525 | "branches MUST be a power of 2" 526 | ); 527 | ensure!(Store::len(self) == leafs, "Inconsistent data"); 528 | ensure!(leafs % 2 == 0, "Leafs must be a power of two"); 529 | 530 | // Process one `level` at a time of `width` nodes. Each level has half the nodes 531 | // as the previous one; the first level, completely stored in `data`, has `leafs` 532 | // nodes. We guarantee an even number of nodes per `level`, duplicating the last 533 | // node if necessary. 534 | let mut level: usize = 0; 535 | let mut width = leafs; 536 | let mut level_node_index = 0; 537 | 538 | let config = config.context("LevelCacheStore build requires a valid config")?; 539 | let shift = log2_pow2(branches); 540 | 541 | // Both in terms of elements, not bytes. 542 | let cache_size = get_merkle_tree_cache_size(leafs, branches, config.rows_to_discard)?; 543 | let cache_index_start = (get_merkle_tree_len(leafs, branches)?) - cache_size; 544 | 545 | while width > 1 { 546 | // Start reading at the beginning of the current level, and writing the next 547 | // level immediate after. `level_node_index` keeps track of the current read 548 | // starts, and width is updated accordingly at each level so that we know where 549 | // to start writing. 550 | let (read_start, write_start) = if level == 0 { 551 | // Note that we previously asserted that data.len() == leafs. 552 | (0, Store::len(self)) 553 | } else if level_node_index < cache_index_start { 554 | (0, width) 555 | } else { 556 | ( 557 | level_node_index - cache_index_start, 558 | (level_node_index + width) - cache_index_start, 559 | ) 560 | }; 561 | 562 | self.process_layer::(width, level, read_start, write_start)?; 563 | 564 | if level_node_index < cache_index_start { 565 | self.front_truncate(&config, width)?; 566 | } 567 | 568 | level_node_index += width; 569 | level += 1; 570 | width >>= shift; // width /= branches; 571 | 572 | // When the layer is complete, update the store length 573 | // since we know the backing file was updated outside of 574 | // the store interface. 575 | self.set_len(level_node_index); 576 | } 577 | 578 | // Account for the root element. 579 | self.set_len(Store::len(self) + 1); 580 | // Ensure every element is accounted for. 581 | ensure!( 582 | Store::len(self) == get_merkle_tree_len(leafs, branches)?, 583 | "Invalid merkle tree length" 584 | ); 585 | 586 | ensure!(row_count == level + 1, "Invalid tree row_count"); 587 | // The root isn't part of the previous loop so `row_count` is 588 | // missing one level. 589 | 590 | // Return the root. Note that the offset is adjusted because 591 | // we've just built a store that says that it has the full 592 | // length of elements, when in fact only the cached portion is 593 | // on disk. 594 | self.read_at_internal(self.len() - cache_index_start - 1) 595 | } 596 | } 597 | 598 | impl LevelCacheStore { 599 | pub fn set_len(&mut self, len: usize) { 600 | self.len = len; 601 | } 602 | 603 | // Remove 'len' elements from the front of the file. 604 | pub fn front_truncate(&mut self, config: &StoreConfig, len: usize) -> Result<()> { 605 | let metadata = self.file.metadata()?; 606 | let store_size = metadata.len(); 607 | let len = (len * E::byte_len()) as u64; 608 | 609 | ensure!(store_size >= len, "Invalid truncation length"); 610 | 611 | // Seek the reader past the length we want removed. 612 | let mut reader = OpenOptions::new() 613 | .read(true) 614 | .open(StoreConfig::data_path(&config.path, &config.id))?; 615 | reader.seek(SeekFrom::Start(len))?; 616 | 617 | // Make sure the store file is opened for read/write. 618 | self.file = OpenOptions::new() 619 | .read(true) 620 | .write(true) 621 | .open(StoreConfig::data_path(&config.path, &config.id))?; 622 | 623 | // Seek the writer. 624 | self.file.seek(SeekFrom::Start(0))?; 625 | 626 | let written = copy(&mut reader, &mut self.file)?; 627 | ensure!(written == store_size - len, "Failed to copy all data"); 628 | 629 | self.file.set_len(written)?; 630 | 631 | Ok(()) 632 | } 633 | 634 | pub fn store_size(&self) -> usize { 635 | self.store_size 636 | } 637 | 638 | // 'store_range' must be the total number of elements in the store (e.g. tree.len()). 639 | pub fn is_consistent_v1( 640 | store_range: usize, 641 | branches: usize, 642 | config: &StoreConfig, 643 | ) -> Result { 644 | let data_path = StoreConfig::data_path(&config.path, &config.id); 645 | 646 | let file = File::open(data_path)?; 647 | let metadata = file.metadata()?; 648 | let store_size = metadata.len() as usize; 649 | 650 | // The LevelCacheStore base data layer must already be a 651 | // massaged next pow2 (guaranteed if created with 652 | // DiskStore::compact, which is the only supported method at 653 | // the moment). 654 | let size = get_merkle_tree_leafs(store_range, branches)?; 655 | ensure!( 656 | size == next_pow2(size), 657 | "Inconsistent merkle tree row_count detected" 658 | ); 659 | 660 | // Calculate cache start and the updated size with repect to 661 | // the data size. 662 | let cache_size = 663 | get_merkle_tree_cache_size(size, branches, config.rows_to_discard)? * E::byte_len(); 664 | 665 | // Sanity checks that the StoreConfig rows_to_discard matches this 666 | // particular on-disk file. 667 | Ok(store_size == size * E::byte_len() + cache_size) 668 | } 669 | 670 | // Note that v2 is now the default compaction mode, so this isn't a versioned call. 671 | // 'store_range' must be the total number of elements in the store (e.g. tree.len()). 672 | pub fn is_consistent( 673 | store_range: usize, 674 | branches: usize, 675 | config: &StoreConfig, 676 | ) -> Result { 677 | let data_path = StoreConfig::data_path(&config.path, &config.id); 678 | 679 | let file = File::open(data_path)?; 680 | let metadata = file.metadata()?; 681 | let store_size = metadata.len() as usize; 682 | 683 | // The LevelCacheStore base data layer must already be a 684 | // massaged next pow2 (guaranteed if created with 685 | // DiskStore::compact, which is the only supported method at 686 | // the moment). 687 | let size = get_merkle_tree_leafs(store_range, branches)?; 688 | ensure!( 689 | size == next_pow2(size), 690 | "Inconsistent merkle tree row_count detected" 691 | ); 692 | 693 | // LevelCacheStore on disk file is only the cached data, so 694 | // the file size dictates the cache_size. Calculate cache 695 | // start and the updated size with repect to the file size. 696 | let cache_size = 697 | get_merkle_tree_cache_size(size, branches, config.rows_to_discard)? * E::byte_len(); 698 | 699 | // Sanity checks that the StoreConfig rows_to_discard matches this 700 | // particular on-disk file. Since an external reader *is* 701 | // set, we check to make sure that the data on disk is *only* 702 | // the cached element data. 703 | Ok(store_size == cache_size) 704 | } 705 | 706 | pub fn store_read_range(&self, start: usize, end: usize) -> Result> { 707 | let read_len = end - start; 708 | let mut read_data = vec![0; read_len]; 709 | let mut adjusted_start = start; 710 | 711 | ensure!( 712 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 713 | "out of bounds" 714 | ); 715 | 716 | // If an external reader was specified for the base layer, use it. 717 | if start < self.data_width * self.elem_len && self.reader.is_some() { 718 | self.reader 719 | .as_ref() 720 | .unwrap() // unwrap is safe as we checked reader to be initialised 721 | .read(start, end, &mut read_data) 722 | .with_context(|| { 723 | format!( 724 | "failed to read {} bytes from file at offset {}", 725 | end - start, 726 | start 727 | ) 728 | })?; 729 | 730 | return Ok(read_data); 731 | } 732 | 733 | // Adjust read index if in the cached ranged to be shifted 734 | // over since the data stored is compacted. 735 | if start >= self.cache_index_start { 736 | let v1 = self.reader.is_none(); 737 | adjusted_start = if v1 { 738 | start - self.cache_index_start + (self.data_width * self.elem_len) 739 | } else { 740 | start - self.cache_index_start 741 | }; 742 | } 743 | 744 | self.file 745 | .read_exact_at(adjusted_start as u64, &mut read_data) 746 | .with_context(|| { 747 | format!( 748 | "failed to read {} bytes from file at offset {}", 749 | read_len, start 750 | ) 751 | })?; 752 | 753 | Ok(read_data) 754 | } 755 | 756 | // This read is for internal use only during the 'build' process. 757 | fn store_read_range_internal(&self, start: usize, end: usize) -> Result> { 758 | let read_len = end - start; 759 | let mut read_data = vec![0; read_len]; 760 | 761 | ensure!( 762 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 763 | "out of bounds" 764 | ); 765 | 766 | self.file 767 | .read_exact_at(start as u64, &mut read_data) 768 | .with_context(|| { 769 | format!( 770 | "failed to read {} bytes from file at offset {}", 771 | read_len, start 772 | ) 773 | })?; 774 | 775 | Ok(read_data) 776 | } 777 | 778 | fn read_range_internal(&self, r: ops::Range) -> Result> { 779 | let start = r.start * self.elem_len; 780 | let end = r.end * self.elem_len; 781 | 782 | let len = self.len * self.elem_len; 783 | ensure!(start < len, "start out of range {} >= {}", start, len); 784 | ensure!(end <= len, "end out of range {} > {}", end, len); 785 | ensure!( 786 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 787 | "out of bounds" 788 | ); 789 | 790 | Ok(self 791 | .store_read_range_internal(start, end)? 792 | .chunks(self.elem_len) 793 | .map(E::from_slice) 794 | .collect()) 795 | } 796 | 797 | fn read_at_internal(&self, index: usize) -> Result { 798 | let start = index * self.elem_len; 799 | let end = start + self.elem_len; 800 | 801 | let len = self.len * self.elem_len; 802 | ensure!(start < len, "start out of range {} >= {}", start, len); 803 | ensure!(end <= len, "end out of range {} > {}", end, len); 804 | ensure!( 805 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 806 | "out of bounds" 807 | ); 808 | 809 | Ok(E::from_slice(&self.store_read_range_internal(start, end)?)) 810 | } 811 | 812 | pub fn store_read_into(&self, start: usize, end: usize, buf: &mut [u8]) -> Result<()> { 813 | ensure!( 814 | start <= self.data_width * self.elem_len || start >= self.cache_index_start, 815 | "Invalid read start" 816 | ); 817 | 818 | // If an external reader was specified for the base layer, use it. 819 | if start < self.data_width * self.elem_len && self.reader.is_some() { 820 | self.reader 821 | .as_ref() 822 | .unwrap() // unwrap is safe as we checked reader to be initialised 823 | .read(start, end, buf) 824 | .with_context(|| { 825 | format!( 826 | "failed to read {} bytes from file at offset {}", 827 | end - start, 828 | start 829 | ) 830 | })?; 831 | } else { 832 | // Adjust read index if in the cached ranged to be shifted 833 | // over since the data stored is compacted. 834 | let adjusted_start = if start >= self.cache_index_start { 835 | if self.reader.is_none() { 836 | // if v1 837 | start - self.cache_index_start + (self.data_width * self.elem_len) 838 | } else { 839 | start - self.cache_index_start 840 | } 841 | } else { 842 | start 843 | }; 844 | 845 | self.file 846 | .read_exact_at(adjusted_start as u64, buf) 847 | .with_context(|| { 848 | format!( 849 | "failed to read {} bytes from file at offset {}", 850 | end - start, 851 | start 852 | ) 853 | })?; 854 | } 855 | 856 | Ok(()) 857 | } 858 | 859 | pub fn store_copy_from_slice(&mut self, start: usize, slice: &[u8]) -> Result<()> { 860 | ensure!( 861 | start + slice.len() <= self.store_size, 862 | "Requested slice too large (max: {})", 863 | self.store_size 864 | ); 865 | self.file.write_all_at(start as u64, slice)?; 866 | 867 | Ok(()) 868 | } 869 | } 870 | --------------------------------------------------------------------------------