├── .github └── workflows │ ├── release.yml │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── assets └── cnidarium.jpg ├── proto ├── penumbra │ └── penumbra │ │ └── cnidarium │ │ └── v1 │ │ └── cnidarium.proto └── rust-vendored │ ├── cosmos │ └── ics23 │ │ └── v1 │ │ └── proofs.proto │ ├── gogoproto │ └── gogo.proto │ └── ibc │ └── core │ └── commitment │ └── v1 │ └── commitment.proto ├── src ├── cache.rs ├── delta.rs ├── escaped_byte_slice.rs ├── future.rs ├── gen │ ├── penumbra.cnidarium.v1.rs │ ├── penumbra.cnidarium.v1.serde.rs │ ├── penumbra.cnidarium.v1alpha1.rs │ ├── penumbra.cnidarium.v1alpha1.serde.rs │ └── proto_descriptor.bin.no_lfs ├── lib.rs ├── metrics.rs ├── proto.rs ├── read.rs ├── rpc.rs ├── snapshot.rs ├── snapshot │ └── rocks_wrapper.rs ├── snapshot_cache.rs ├── storage.rs ├── storage │ └── temp.rs ├── store.rs ├── store │ ├── multistore.rs │ └── substore.rs ├── tests.rs ├── tests │ └── delta.rs ├── utils.rs ├── write.rs └── write_batch.rs ├── tests ├── migration.rs ├── substore_tests.rs └── write_batch.rs └── tools └── proto-compiler ├── Cargo.toml ├── README.md └── src └── main.rs /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | push: 5 | tags: 6 | - "v[0-9]+.[0-9]+.[0-9]+" # Push events to matching v*, i.e. v0.26.0, v1.0.0 7 | - "v[0-9]+.[0-9]+.[0-9]+-pre.[0-9]+" # e.g. v0.26.0-pre.1 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - uses: actions-rs/toolchain@v1 15 | with: 16 | toolchain: stable 17 | override: true 18 | - name: Publish crate 19 | run: cargo publish --token ${{ secrets.CRATES_TOKEN }} 20 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | - name: Run migration tests 24 | run: cargo test --features migration 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cnidarium" 3 | version = "0.84.0" 4 | authors = [ 5 | "Penumbra Labs ", 6 | "Henry de Valence ", 7 | "Erwan Or " 8 | ] 9 | edition = "2021" 10 | license = "MIT" 11 | description = "A fast and verifiable storage layer for blockchains" 12 | repository = "https://github.com/penumbra-zone/cnidarium" 13 | homepage = "https://github.com/penumbra-zone/cnidarium" 14 | documentation = "https://docs.rs/cnidarium" 15 | 16 | [features] 17 | migration = [] 18 | migration-proptests = ["migration"] 19 | default = ["metrics"] 20 | rpc = ["proto"] 21 | proto = ["dep:tonic", "dep:prost", "dep:serde", "dep:pbjson", "dep:ibc-proto"] 22 | 23 | [dependencies] 24 | anyhow = "1.0.86" 25 | async-trait = "0.1.80" 26 | base64 = "0.21.7" 27 | borsh = { version = "1.3.0" , features = ["derive", "de_strict_order"]} 28 | futures = "0.3.30" 29 | hex = "0.4.3" 30 | ibc-proto = { version = "0.52.0", default-features = false, features = ["serde"], optional = true } 31 | ibc-types = { version = "0.16.0", default-features = false, features = ["std"] } 32 | ics23 = "0.12.0" 33 | jmt = { version = "0.11", features = ["migration"] } 34 | metrics = { version = "0.24", optional = true } 35 | once_cell = "1.19.0" 36 | parking_lot = "0.12.3" 37 | pbjson = { version = "0.7", optional = true } 38 | pin-project = "1.1.5" 39 | prost = { version = "0.13.3", optional = true } 40 | regex = "1.10.5" 41 | rocksdb = "0.21" 42 | serde = { version = "1", optional = true} 43 | sha2 = "0.10" 44 | smallvec = { version = "1.10", features = ["union", "const_generics"] } 45 | tempfile = "3.10.1" 46 | tendermint = { version = "0.40.3", default-features = false } 47 | tokio = { version = "1.44", features = ["full", "tracing"] } 48 | tokio-stream = "0.1.17" 49 | tonic = { version = "0.13.0", optional = true } 50 | tracing = "0.1" 51 | 52 | [dev-dependencies] 53 | tempfile = "3.10.1" 54 | tracing-subscriber = "0.3.18" 55 | tokio = { version = "1.44", features = ["full", "rt-multi-thread"] } 56 | proptest = "1.3.1" 57 | test-strategy = "0.3.1" 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Penumbra 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cnidarium: fast storage layer for app chains and L2s 2 | 3 | ![Cnidarium](assets/cnidarium.jpg) 4 | 5 | Documentation at https://docs.rs/cnidarium/latest/cnidarium/ 6 | 7 | -------------------------------------------------------------------------------- /assets/cnidarium.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/penumbra-zone/cnidarium/27e8b027452cf1775b10ac2cf8bda4f013de0d82/assets/cnidarium.jpg -------------------------------------------------------------------------------- /proto/penumbra/penumbra/cnidarium/v1/cnidarium.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package penumbra.cnidarium.v1; 4 | 5 | import "ibc/core/commitment/v1/commitment.proto"; 6 | 7 | service QueryService { 8 | // General-purpose key-value state query API, that can be used to query 9 | // arbitrary keys in the JMT storage. 10 | rpc KeyValue(KeyValueRequest) returns (KeyValueResponse); 11 | 12 | // General-purpose key-value state query API, that can be used to query 13 | // arbitrary keys in the non-verifiable storage. 14 | rpc NonVerifiableKeyValue(NonVerifiableKeyValueRequest) returns (NonVerifiableKeyValueResponse); 15 | 16 | // General-purpose prefixed key-value state query API, that can be used to query 17 | // arbitrary prefixes in the JMT storage. 18 | rpc PrefixValue(PrefixValueRequest) returns (stream PrefixValueResponse); 19 | 20 | // Subscribes to a stream of key-value updates, with regex filtering on keys. 21 | rpc Watch(WatchRequest) returns (stream WatchResponse); 22 | } 23 | 24 | // Performs a key-value query against the nonverifiable storage, 25 | // using a byte-encoded key. 26 | message NonVerifiableKeyValueRequest { 27 | message Key { 28 | bytes inner = 1; 29 | } 30 | 31 | Key key = 1; 32 | } 33 | 34 | message NonVerifiableKeyValueResponse { 35 | message Value { 36 | bytes value = 1; 37 | } 38 | // The value corresponding to the specified key, if it was found. 39 | Value value = 1; 40 | } 41 | 42 | // Performs a key-value query against the JMT, either by key or by key hash. 43 | // 44 | // Proofs are only supported by key. 45 | message KeyValueRequest { 46 | // If set, the key to fetch from storage. 47 | string key = 2; 48 | // whether to return a proof 49 | bool proof = 3; 50 | } 51 | 52 | message KeyValueResponse { 53 | message Value { 54 | bytes value = 1; 55 | } 56 | // The value corresponding to the specified key, if it was found. 57 | Value value = 1; 58 | // A proof of existence or non-existence. 59 | .ibc.core.commitment.v1.MerkleProof proof = 2; 60 | } 61 | 62 | // Performs a prefixed key-value query, by string prefix. 63 | message PrefixValueRequest { 64 | // The prefix to fetch subkeys from storage. 65 | string prefix = 2; 66 | } 67 | 68 | message PrefixValueResponse { 69 | string key = 1; 70 | bytes value = 2; 71 | } 72 | 73 | // Requests a stream of new key-value pairs that have been committed to the state. 74 | message WatchRequest { 75 | // A regex for keys in the verifiable storage. 76 | // 77 | // Only key-value updates whose keys match this regex will be returned. 78 | // Note that the empty string matches all keys. 79 | // To exclude all keys, use the regex "$^", which matches no strings. 80 | string key_regex = 1; 81 | // A regex for keys in the nonverifiable storage. 82 | // 83 | // Only key-value updates whose keys match this regex will be returned. 84 | // Note that the empty string matches all keys. 85 | // To exclude all keys, use the regex "$^", which matches no strings. 86 | string nv_key_regex = 2; 87 | } 88 | 89 | // A key-value pair that has been committed to the state. 90 | message WatchResponse { 91 | // Elements of the verifiable storage have string keys. 92 | message KeyValue { 93 | string key = 1; 94 | bytes value = 2; 95 | // If set to true, the key-value pair was deleted. 96 | // This allows distinguishing a deleted key-value pair from a key-value pair whose value is empty. 97 | bool deleted = 3; 98 | } 99 | // Elements of the nonverifiable storage have byte keys. 100 | message NvKeyValue { 101 | bytes key = 1; 102 | bytes value = 2; 103 | // If set to true, the key-value pair was deleted. 104 | // This allows distinguishing a deleted key-value pair from a key-value pair whose value is empty. 105 | bool deleted = 3; 106 | } 107 | 108 | // The state version the key-value pair was committed at. 109 | uint64 version = 1; 110 | 111 | // The entry that was committed. 112 | oneof entry { 113 | KeyValue kv = 5; 114 | NvKeyValue nv_kv = 6; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /proto/rust-vendored/cosmos/ics23/v1/proofs.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package cosmos.ics23.v1; 4 | 5 | option go_package = "github.com/cosmos/ics23/go;ics23"; 6 | 7 | enum HashOp { 8 | // NO_HASH is the default if no data passed. Note this is an illegal argument some places. 9 | NO_HASH = 0; 10 | SHA256 = 1; 11 | SHA512 = 2; 12 | KECCAK = 3; 13 | RIPEMD160 = 4; 14 | BITCOIN = 5; // ripemd160(sha256(x)) 15 | SHA512_256 = 6; 16 | } 17 | 18 | /** 19 | LengthOp defines how to process the key and value of the LeafOp 20 | to include length information. After encoding the length with the given 21 | algorithm, the length will be prepended to the key and value bytes. 22 | (Each one with it's own encoded length) 23 | */ 24 | enum LengthOp { 25 | // NO_PREFIX don't include any length info 26 | NO_PREFIX = 0; 27 | // VAR_PROTO uses protobuf (and go-amino) varint encoding of the length 28 | VAR_PROTO = 1; 29 | // VAR_RLP uses rlp int encoding of the length 30 | VAR_RLP = 2; 31 | // FIXED32_BIG uses big-endian encoding of the length as a 32 bit integer 32 | FIXED32_BIG = 3; 33 | // FIXED32_LITTLE uses little-endian encoding of the length as a 32 bit integer 34 | FIXED32_LITTLE = 4; 35 | // FIXED64_BIG uses big-endian encoding of the length as a 64 bit integer 36 | FIXED64_BIG = 5; 37 | // FIXED64_LITTLE uses little-endian encoding of the length as a 64 bit integer 38 | FIXED64_LITTLE = 6; 39 | // REQUIRE_32_BYTES is like NONE, but will fail if the input is not exactly 32 bytes (sha256 output) 40 | REQUIRE_32_BYTES = 7; 41 | // REQUIRE_64_BYTES is like NONE, but will fail if the input is not exactly 64 bytes (sha512 output) 42 | REQUIRE_64_BYTES = 8; 43 | } 44 | 45 | /** 46 | ExistenceProof takes a key and a value and a set of steps to perform on it. 47 | The result of peforming all these steps will provide a "root hash", which can 48 | be compared to the value in a header. 49 | 50 | Since it is computationally infeasible to produce a hash collission for any of the used 51 | cryptographic hash functions, if someone can provide a series of operations to transform 52 | a given key and value into a root hash that matches some trusted root, these key and values 53 | must be in the referenced merkle tree. 54 | 55 | The only possible issue is maliablity in LeafOp, such as providing extra prefix data, 56 | which should be controlled by a spec. Eg. with lengthOp as NONE, 57 | prefix = FOO, key = BAR, value = CHOICE 58 | and 59 | prefix = F, key = OOBAR, value = CHOICE 60 | would produce the same value. 61 | 62 | With LengthOp this is tricker but not impossible. Which is why the "leafPrefixEqual" field 63 | in the ProofSpec is valuable to prevent this mutability. And why all trees should 64 | length-prefix the data before hashing it. 65 | */ 66 | message ExistenceProof { 67 | bytes key = 1; 68 | bytes value = 2; 69 | LeafOp leaf = 3; 70 | repeated InnerOp path = 4; 71 | } 72 | 73 | /* 74 | NonExistenceProof takes a proof of two neighbors, one left of the desired key, 75 | one right of the desired key. If both proofs are valid AND they are neighbors, 76 | then there is no valid proof for the given key. 77 | */ 78 | message NonExistenceProof { 79 | bytes key = 1; // TODO: remove this as unnecessary??? we prove a range 80 | ExistenceProof left = 2; 81 | ExistenceProof right = 3; 82 | } 83 | 84 | /* 85 | CommitmentProof is either an ExistenceProof or a NonExistenceProof, or a Batch of such messages 86 | */ 87 | message CommitmentProof { 88 | oneof proof { 89 | ExistenceProof exist = 1; 90 | NonExistenceProof nonexist = 2; 91 | BatchProof batch = 3; 92 | CompressedBatchProof compressed = 4; 93 | } 94 | } 95 | 96 | /** 97 | LeafOp represents the raw key-value data we wish to prove, and 98 | must be flexible to represent the internal transformation from 99 | the original key-value pairs into the basis hash, for many existing 100 | merkle trees. 101 | 102 | key and value are passed in. So that the signature of this operation is: 103 | leafOp(key, value) -> output 104 | 105 | To process this, first prehash the keys and values if needed (ANY means no hash in this case): 106 | hkey = prehashKey(key) 107 | hvalue = prehashValue(value) 108 | 109 | Then combine the bytes, and hash it 110 | output = hash(prefix || length(hkey) || hkey || length(hvalue) || hvalue) 111 | */ 112 | message LeafOp { 113 | HashOp hash = 1; 114 | HashOp prehash_key = 2; 115 | HashOp prehash_value = 3; 116 | LengthOp length = 4; 117 | // prefix is a fixed bytes that may optionally be included at the beginning to differentiate 118 | // a leaf node from an inner node. 119 | bytes prefix = 5; 120 | } 121 | 122 | /** 123 | InnerOp represents a merkle-proof step that is not a leaf. 124 | It represents concatenating two children and hashing them to provide the next result. 125 | 126 | The result of the previous step is passed in, so the signature of this op is: 127 | innerOp(child) -> output 128 | 129 | The result of applying InnerOp should be: 130 | output = op.hash(op.prefix || child || op.suffix) 131 | 132 | where the || operator is concatenation of binary data, 133 | and child is the result of hashing all the tree below this step. 134 | 135 | Any special data, like prepending child with the length, or prepending the entire operation with 136 | some value to differentiate from leaf nodes, should be included in prefix and suffix. 137 | If either of prefix or suffix is empty, we just treat it as an empty string 138 | */ 139 | message InnerOp { 140 | HashOp hash = 1; 141 | bytes prefix = 2; 142 | bytes suffix = 3; 143 | } 144 | 145 | /** 146 | ProofSpec defines what the expected parameters are for a given proof type. 147 | This can be stored in the client and used to validate any incoming proofs. 148 | 149 | verify(ProofSpec, Proof) -> Proof | Error 150 | 151 | As demonstrated in tests, if we don't fix the algorithm used to calculate the 152 | LeafHash for a given tree, there are many possible key-value pairs that can 153 | generate a given hash (by interpretting the preimage differently). 154 | We need this for proper security, requires client knows a priori what 155 | tree format server uses. But not in code, rather a configuration object. 156 | */ 157 | message ProofSpec { 158 | // any field in the ExistenceProof must be the same as in this spec. 159 | // except Prefix, which is just the first bytes of prefix (spec can be longer) 160 | LeafOp leaf_spec = 1; 161 | InnerSpec inner_spec = 2; 162 | // max_depth (if > 0) is the maximum number of InnerOps allowed (mainly for fixed-depth tries) 163 | int32 max_depth = 3; 164 | // min_depth (if > 0) is the minimum number of InnerOps allowed (mainly for fixed-depth tries) 165 | int32 min_depth = 4; 166 | } 167 | 168 | /* 169 | InnerSpec contains all store-specific structure info to determine if two proofs from a 170 | given store are neighbors. 171 | 172 | This enables: 173 | 174 | isLeftMost(spec: InnerSpec, op: InnerOp) 175 | isRightMost(spec: InnerSpec, op: InnerOp) 176 | isLeftNeighbor(spec: InnerSpec, left: InnerOp, right: InnerOp) 177 | */ 178 | message InnerSpec { 179 | // Child order is the ordering of the children node, must count from 0 180 | // iavl tree is [0, 1] (left then right) 181 | // merk is [0, 2, 1] (left, right, here) 182 | repeated int32 child_order = 1; 183 | int32 child_size = 2; 184 | int32 min_prefix_length = 3; 185 | int32 max_prefix_length = 4; 186 | // empty child is the prehash image that is used when one child is nil (eg. 20 bytes of 0) 187 | bytes empty_child = 5; 188 | // hash is the algorithm that must be used for each InnerOp 189 | HashOp hash = 6; 190 | } 191 | 192 | /* 193 | BatchProof is a group of multiple proof types than can be compressed 194 | */ 195 | message BatchProof { 196 | repeated BatchEntry entries = 1; 197 | } 198 | 199 | // Use BatchEntry not CommitmentProof, to avoid recursion 200 | message BatchEntry { 201 | oneof proof { 202 | ExistenceProof exist = 1; 203 | NonExistenceProof nonexist = 2; 204 | } 205 | } 206 | 207 | /****** all items here are compressed forms *******/ 208 | 209 | message CompressedBatchProof { 210 | repeated CompressedBatchEntry entries = 1; 211 | repeated InnerOp lookup_inners = 2; 212 | } 213 | 214 | // Use BatchEntry not CommitmentProof, to avoid recursion 215 | message CompressedBatchEntry { 216 | oneof proof { 217 | CompressedExistenceProof exist = 1; 218 | CompressedNonExistenceProof nonexist = 2; 219 | } 220 | } 221 | 222 | message CompressedExistenceProof { 223 | bytes key = 1; 224 | bytes value = 2; 225 | LeafOp leaf = 3; 226 | // these are indexes into the lookup_inners table in CompressedBatchProof 227 | repeated int32 path = 4; 228 | } 229 | 230 | message CompressedNonExistenceProof { 231 | bytes key = 1; // TODO: remove this as unnecessary??? we prove a range 232 | CompressedExistenceProof left = 2; 233 | CompressedExistenceProof right = 3; 234 | } 235 | -------------------------------------------------------------------------------- /proto/rust-vendored/gogoproto/gogo.proto: -------------------------------------------------------------------------------- 1 | // Protocol Buffers for Go with Gadgets 2 | // 3 | // Copyright (c) 2013, The GoGo Authors. All rights reserved. 4 | // http://github.com/cosmos/gogoproto 5 | // 6 | // Redistribution and use in source and binary forms, with or without 7 | // modification, are permitted provided that the following conditions are 8 | // met: 9 | // 10 | // * Redistributions of source code must retain the above copyright 11 | // notice, this list of conditions and the following disclaimer. 12 | // * Redistributions in binary form must reproduce the above 13 | // copyright notice, this list of conditions and the following disclaimer 14 | // in the documentation and/or other materials provided with the 15 | // distribution. 16 | // 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | syntax = "proto2"; 30 | package gogoproto; 31 | 32 | import "google/protobuf/descriptor.proto"; 33 | 34 | option java_package = "com.google.protobuf"; 35 | option java_outer_classname = "GoGoProtos"; 36 | option go_package = "github.com/cosmos/gogoproto/gogoproto"; 37 | 38 | extend google.protobuf.EnumOptions { 39 | optional bool goproto_enum_prefix = 62001; 40 | optional bool goproto_enum_stringer = 62021; 41 | optional bool enum_stringer = 62022; 42 | optional string enum_customname = 62023; 43 | optional bool enumdecl = 62024; 44 | } 45 | 46 | extend google.protobuf.EnumValueOptions { 47 | optional string enumvalue_customname = 66001; 48 | } 49 | 50 | extend google.protobuf.FileOptions { 51 | optional bool goproto_getters_all = 63001; 52 | optional bool goproto_enum_prefix_all = 63002; 53 | optional bool goproto_stringer_all = 63003; 54 | optional bool verbose_equal_all = 63004; 55 | optional bool face_all = 63005; 56 | optional bool gostring_all = 63006; 57 | optional bool populate_all = 63007; 58 | optional bool stringer_all = 63008; 59 | optional bool onlyone_all = 63009; 60 | 61 | optional bool equal_all = 63013; 62 | optional bool description_all = 63014; 63 | optional bool testgen_all = 63015; 64 | optional bool benchgen_all = 63016; 65 | optional bool marshaler_all = 63017; 66 | optional bool unmarshaler_all = 63018; 67 | optional bool stable_marshaler_all = 63019; 68 | 69 | optional bool sizer_all = 63020; 70 | 71 | optional bool goproto_enum_stringer_all = 63021; 72 | optional bool enum_stringer_all = 63022; 73 | 74 | optional bool unsafe_marshaler_all = 63023; 75 | optional bool unsafe_unmarshaler_all = 63024; 76 | 77 | optional bool goproto_extensions_map_all = 63025; 78 | optional bool goproto_unrecognized_all = 63026; 79 | optional bool gogoproto_import = 63027; 80 | optional bool protosizer_all = 63028; 81 | optional bool compare_all = 63029; 82 | optional bool typedecl_all = 63030; 83 | optional bool enumdecl_all = 63031; 84 | 85 | optional bool goproto_registration = 63032; 86 | optional bool messagename_all = 63033; 87 | 88 | optional bool goproto_sizecache_all = 63034; 89 | optional bool goproto_unkeyed_all = 63035; 90 | } 91 | 92 | extend google.protobuf.MessageOptions { 93 | optional bool goproto_getters = 64001; 94 | optional bool goproto_stringer = 64003; 95 | optional bool verbose_equal = 64004; 96 | optional bool face = 64005; 97 | optional bool gostring = 64006; 98 | optional bool populate = 64007; 99 | optional bool stringer = 67008; 100 | optional bool onlyone = 64009; 101 | 102 | optional bool equal = 64013; 103 | optional bool description = 64014; 104 | optional bool testgen = 64015; 105 | optional bool benchgen = 64016; 106 | optional bool marshaler = 64017; 107 | optional bool unmarshaler = 64018; 108 | optional bool stable_marshaler = 64019; 109 | 110 | optional bool sizer = 64020; 111 | 112 | optional bool unsafe_marshaler = 64023; 113 | optional bool unsafe_unmarshaler = 64024; 114 | 115 | optional bool goproto_extensions_map = 64025; 116 | optional bool goproto_unrecognized = 64026; 117 | 118 | optional bool protosizer = 64028; 119 | optional bool compare = 64029; 120 | 121 | optional bool typedecl = 64030; 122 | 123 | optional bool messagename = 64033; 124 | 125 | optional bool goproto_sizecache = 64034; 126 | optional bool goproto_unkeyed = 64035; 127 | } 128 | 129 | extend google.protobuf.FieldOptions { 130 | optional bool nullable = 65001; 131 | optional bool embed = 65002; 132 | optional string customtype = 65003; 133 | optional string customname = 65004; 134 | optional string jsontag = 65005; 135 | optional string moretags = 65006; 136 | optional string casttype = 65007; 137 | optional string castkey = 65008; 138 | optional string castvalue = 65009; 139 | 140 | optional bool stdtime = 65010; 141 | optional bool stdduration = 65011; 142 | optional bool wktpointer = 65012; 143 | 144 | optional string castrepeated = 65013; 145 | } 146 | -------------------------------------------------------------------------------- /proto/rust-vendored/ibc/core/commitment/v1/commitment.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package ibc.core.commitment.v1; 4 | 5 | option go_package = "github.com/cosmos/ibc-go/v8/modules/core/23-commitment/types"; 6 | 7 | import "gogoproto/gogo.proto"; 8 | import "cosmos/ics23/v1/proofs.proto"; 9 | 10 | // MerkleRoot defines a merkle root hash. 11 | // In the Cosmos SDK, the AppHash of a block header becomes the root. 12 | message MerkleRoot { 13 | option (gogoproto.goproto_getters) = false; 14 | 15 | bytes hash = 1; 16 | } 17 | 18 | // MerklePrefix is merkle path prefixed to the key. 19 | // The constructed key from the Path and the key will be append(Path.KeyPath, 20 | // append(Path.KeyPrefix, key...)) 21 | message MerklePrefix { 22 | bytes key_prefix = 1; 23 | } 24 | 25 | // MerklePath is the path used to verify commitment proofs, which can be an 26 | // arbitrary structured object (defined by a commitment type). 27 | // MerklePath is represented from root-to-leaf 28 | message MerklePath { 29 | option (gogoproto.goproto_stringer) = false; 30 | 31 | repeated string key_path = 1; 32 | } 33 | 34 | // MerkleProof is a wrapper type over a chain of CommitmentProofs. 35 | // It demonstrates membership or non-membership for an element or set of 36 | // elements, verifiable in conjunction with a known commitment root. Proofs 37 | // should be succinct. 38 | // MerkleProofs are ordered from leaf-to-root 39 | message MerkleProof { 40 | repeated cosmos.ics23.v1.CommitmentProof proofs = 1; 41 | } 42 | -------------------------------------------------------------------------------- /src/cache.rs: -------------------------------------------------------------------------------- 1 | use std::{any::Any, collections::BTreeMap, sync::Arc}; 2 | 3 | use tendermint::abci; 4 | 5 | use crate::{ 6 | store::{multistore::MultistoreConfig, substore::SubstoreConfig}, 7 | StateWrite, 8 | }; 9 | 10 | /// A cache of changes to the state of the blockchain. 11 | /// 12 | /// A [`StateDelta`](crate::StateDelta) is `Cache` above a `StateRead`. 13 | #[derive(Default, Debug)] 14 | pub struct Cache { 15 | /// Unwritten changes to the consensus-critical state (stored in the JMT). 16 | pub(crate) unwritten_changes: BTreeMap>>, 17 | /// Unwritten changes to non-consensus-critical state (stored in the nonverifiable storage). 18 | pub(crate) nonverifiable_changes: BTreeMap, Option>>, 19 | /// Unwritten changes to the object store. A `None` value means a deletion. 20 | pub(crate) ephemeral_objects: BTreeMap<&'static str, Option>>, 21 | /// A list of ABCI events that occurred while building this set of state changes. 22 | pub(crate) events: Vec, 23 | } 24 | 25 | impl Cache { 26 | /// Inspect the cache of unwritten changes to the verifiable state. 27 | pub fn unwritten_changes(&self) -> &BTreeMap>> { 28 | &self.unwritten_changes 29 | } 30 | 31 | /// Inspect the cache of unwritten changes to the nonverifiable state. 32 | pub fn nonverifiable_changes(&self) -> &BTreeMap, Option>> { 33 | &self.nonverifiable_changes 34 | } 35 | 36 | /// Merge the given cache with this one, taking its writes in place of ours. 37 | pub fn merge(&mut self, other: Cache) { 38 | // One might ask, why does this exist separately from `apply_to`? The 39 | // answer is that `apply_to` takes a `StateWrite`, so we'd have to have 40 | // `Cache: StateWrite`, and that implies `Cache: StateRead`, but the 41 | // `StateRead` trait assumes asynchronous access, and in any case, we 42 | // probably don't want to be reading directly from a `Cache` (?) 43 | self.unwritten_changes.extend(other.unwritten_changes); 44 | self.nonverifiable_changes 45 | .extend(other.nonverifiable_changes); 46 | self.ephemeral_objects.extend(other.ephemeral_objects); 47 | self.events.extend(other.events); 48 | } 49 | 50 | /// Consume this cache, applying its writes to the given state. 51 | pub fn apply_to(self, mut state: S) { 52 | for (key, value) in self.unwritten_changes { 53 | if let Some(value) = value { 54 | state.put_raw(key, value); 55 | } else { 56 | state.delete(key); 57 | } 58 | } 59 | 60 | for (key, value) in self.nonverifiable_changes { 61 | if let Some(value) = value { 62 | state.nonverifiable_put_raw(key, value); 63 | } else { 64 | state.nonverifiable_delete(key); 65 | } 66 | } 67 | 68 | // It's important to use object_merge here, so that we don't re-box all 69 | // of the objects, causing downcasting to fail. 70 | state.object_merge(self.ephemeral_objects); 71 | 72 | for event in self.events { 73 | state.record(event); 74 | } 75 | } 76 | 77 | /// Returns `true` if there are cached writes on top of the snapshot, and `false` otherwise. 78 | pub fn is_dirty(&self) -> bool { 79 | !(self.unwritten_changes.is_empty() 80 | && self.nonverifiable_changes.is_empty() 81 | && self.ephemeral_objects.is_empty()) 82 | } 83 | 84 | /// Extracts and returns the ABCI events contained in this cache. 85 | pub fn take_events(&mut self) -> Vec { 86 | std::mem::take(&mut self.events) 87 | } 88 | 89 | /// Consumes a `Cache` and returns a map of `SubstoreConfig` to `Cache` that 90 | /// corresponds to changes belonging to each substore. The keys in each `Cache` 91 | /// are truncated to remove the substore prefix. 92 | pub fn shard_by_prefix( 93 | self, 94 | prefixes: &MultistoreConfig, 95 | ) -> BTreeMap, Self> { 96 | let mut changes_by_substore = BTreeMap::new(); 97 | for (key, some_value) in self.unwritten_changes.into_iter() { 98 | let (truncated_key, substore_config) = prefixes.route_key_str(&key); 99 | changes_by_substore 100 | .entry(substore_config) 101 | .or_insert_with(Cache::default) 102 | .unwritten_changes 103 | .insert(truncated_key.to_string(), some_value); 104 | } 105 | 106 | for (key, some_value) in self.nonverifiable_changes { 107 | let (truncated_key, substore_config) = prefixes.route_key_bytes(&key); 108 | changes_by_substore 109 | .entry(substore_config) 110 | .or_insert_with(Cache::default) 111 | .nonverifiable_changes 112 | .insert(truncated_key.to_vec(), some_value); 113 | } 114 | changes_by_substore 115 | } 116 | 117 | pub(crate) fn clone_changes(&self) -> Self { 118 | Self { 119 | unwritten_changes: self.unwritten_changes.clone(), 120 | nonverifiable_changes: self.nonverifiable_changes.clone(), 121 | ephemeral_objects: Default::default(), 122 | events: Default::default(), 123 | } 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/delta.rs: -------------------------------------------------------------------------------- 1 | use std::{any::Any, sync::Arc}; 2 | 3 | use futures::StreamExt; 4 | use parking_lot::RwLock; 5 | use tendermint::abci; 6 | 7 | use crate::{ 8 | future::{ 9 | CacheFuture, StateDeltaNonconsensusPrefixRawStream, StateDeltaNonconsensusRangeRawStream, 10 | StateDeltaPrefixKeysStream, StateDeltaPrefixRawStream, 11 | }, 12 | utils, Cache, EscapedByteSlice, StateRead, StateWrite, 13 | }; 14 | 15 | /// An arbitrarily-deeply nested stack of delta updates to an underlying state. 16 | /// 17 | /// This API allows exploring a tree of possible execution paths concurrently, 18 | /// before finally selecting one and applying it to the underlying state. 19 | /// 20 | /// Using this API requires understanding its invariants. 21 | /// 22 | /// On creation, `StateDelta::new` takes ownership of a `StateRead + StateWrite` 23 | /// instance, acquiring a "write lock" over the underlying state (since `&mut S` 24 | /// is `StateWrite` if `S: StateWrite`, it's possible to pass a unique 25 | /// reference). 26 | /// 27 | /// The resulting `StateDelta` instance is a "leaf" state, and can be used for 28 | /// reads and writes, following the some execution path. 29 | /// 30 | /// When two potential execution paths diverge, `delta.fork()` can be used to 31 | /// fork the state update. The new forked `StateDelta` will include all 32 | /// previous state writes made to the original (and its ancestors). Any writes 33 | /// made to the original `StateDelta` after `fork()` is called will not be seen 34 | /// by the forked state. 35 | /// 36 | /// Finally, after some execution path has been selected, calling 37 | /// `delta.apply()` on one of the possible state updates will commit the changes 38 | /// to the underlying state instance, and invalidate all other delta updates in 39 | /// the same family. It is a programming error to use the other delta updates 40 | /// after `apply()` has been called, but ideally this should not be a problem in 41 | /// practice: the API is intended to explore a tree of possible execution paths; 42 | /// once one has been selected, the others should be discarded. 43 | #[derive(Debug)] 44 | pub struct StateDelta { 45 | /// The underlying state instance. 46 | /// 47 | /// The Arc<_> allows it to be shared between different stacks of delta updates, 48 | /// and the RwLock> allows it to be taken out when it's time to commit 49 | /// the changes from one of the stacks. 50 | state: Arc>>, 51 | /// A stack of intermediate delta updates, with the "top" layers first. 52 | /// 53 | /// We store all the layers directly, rather than using a recursive structure, 54 | /// so that the type doesn't depend on how many layers are involved. We're only 55 | /// duplicating the Arc<_>, so this should be cheap. 56 | layers: Vec>>>, 57 | /// The final delta update in the stack, the one we're currently working on. 58 | /// Storing this separately allows us to avoid lock contention during writes. 59 | /// In fact, this data shouldn't usually be shared at all; the only reason it's 60 | /// wrapped this way is so that prefix streams can have 'static lifetimes. 61 | /// We option-wrap it so it can be chained with the layers; it will never be None. 62 | leaf_cache: Arc>>, 63 | } 64 | 65 | impl StateDelta { 66 | /// Create a new tree of possible updates to an underlying `state`. 67 | pub fn new(state: S) -> Self { 68 | Self { 69 | state: Arc::new(RwLock::new(Some(state))), 70 | layers: Vec::default(), 71 | leaf_cache: Arc::new(RwLock::new(Some(Cache::default()))), 72 | } 73 | } 74 | 75 | /// Fork execution, returning a new child state that includes all previous changes. 76 | pub fn fork(&mut self) -> Self { 77 | // If we have writes in the leaf cache, we'll move them to a new layer, 78 | // ensuring that the new child only sees writes made to this state 79 | // *before* fork was called, and not after. 80 | // 81 | // Doing this only when the leaf cache is dirty means that we don't 82 | // add empty layers in repeated fork() calls without intervening writes. 83 | if self 84 | .leaf_cache 85 | .read() 86 | .as_ref() 87 | .expect("unable to get ref to leaf cache, storage not initialized?") 88 | .is_dirty() 89 | { 90 | let new_layer = std::mem::replace( 91 | &mut self.leaf_cache, 92 | Arc::new(RwLock::new(Some(Cache::default()))), 93 | ); 94 | self.layers.push(new_layer); 95 | } 96 | 97 | Self { 98 | state: self.state.clone(), 99 | layers: self.layers.clone(), 100 | leaf_cache: Arc::new(RwLock::new(Some(Cache::default()))), 101 | } 102 | } 103 | 104 | /// Flatten all changes in this branch of the tree into a single [`Cache`], 105 | /// invalidating all other branches of the tree and releasing the underlying 106 | /// state back to the caller. 107 | /// 108 | /// The [`apply`](Self::apply) method is a convenience wrapper around this 109 | /// that applies the changes to the underlying state. 110 | pub fn flatten(self) -> (S, Cache) { 111 | tracing::trace!("flattening branch"); 112 | // Take ownership of the underlying state, immediately invalidating all 113 | // other delta stacks in the same family. 114 | let state = self 115 | .state 116 | .write() 117 | .take() 118 | .expect("apply must be called only once"); 119 | 120 | // Flatten the intermediate layers into a single cache, applying them from oldest 121 | // (bottom) to newest (top), so that newer writes clobber old ones. 122 | let mut changes = Cache::default(); 123 | for layer in self.layers { 124 | let cache = layer 125 | .write() 126 | .take() 127 | .expect("cache must not have already been applied"); 128 | changes.merge(cache); 129 | } 130 | // Last, apply the changes in the leaf cache. 131 | changes.merge( 132 | self.leaf_cache 133 | .write() 134 | .take() 135 | .expect("unable to take leaf cache, was it already applied?"), 136 | ); 137 | 138 | (state, changes) 139 | } 140 | } 141 | 142 | impl StateDelta { 143 | /// Apply all changes in this branch of the tree to the underlying state, 144 | /// releasing it back to the caller and invalidating all other branches of 145 | /// the tree. 146 | pub fn apply(self) -> (S, Vec) { 147 | let (mut state, mut changes) = self.flatten(); 148 | let events = changes.take_events(); 149 | 150 | // Apply the flattened changes to the underlying state. 151 | changes.apply_to(&mut state); 152 | 153 | // Finally, return ownership of the state back to the caller. 154 | (state, events) 155 | } 156 | } 157 | 158 | impl StateDelta> { 159 | pub fn try_apply(self) -> anyhow::Result<(S, Vec)> { 160 | let (arc_state, mut changes) = self.flatten(); 161 | let events = std::mem::take(&mut changes.events); 162 | 163 | if let Ok(mut state) = Arc::try_unwrap(arc_state) { 164 | // Apply the flattened changes to the underlying state. 165 | changes.apply_to(&mut state); 166 | 167 | // Finally, return ownership of the state back to the caller. 168 | Ok((state, events)) 169 | } else { 170 | Err(anyhow::anyhow!("did not have unique ownership of Arc")) 171 | } 172 | } 173 | } 174 | 175 | impl StateRead for StateDelta { 176 | type GetRawFut = CacheFuture; 177 | type PrefixRawStream = StateDeltaPrefixRawStream; 178 | type PrefixKeysStream = StateDeltaPrefixKeysStream; 179 | type NonconsensusPrefixRawStream = 180 | StateDeltaNonconsensusPrefixRawStream; 181 | type NonconsensusRangeRawStream = 182 | StateDeltaNonconsensusRangeRawStream; 183 | 184 | fn get_raw(&self, key: &str) -> Self::GetRawFut { 185 | // Check if we have a cache hit in the leaf cache. 186 | if let Some(entry) = self 187 | .leaf_cache 188 | .read() 189 | .as_ref() 190 | .expect("delta must not have been applied") 191 | .unwritten_changes 192 | .get(key) 193 | { 194 | return CacheFuture::hit(entry.clone()); 195 | } 196 | 197 | // Iterate through the stack, top to bottom, to see if we have a cache hit. 198 | for layer in self.layers.iter().rev() { 199 | if let Some(entry) = layer 200 | .read() 201 | .as_ref() 202 | .expect("delta must not have been applied") 203 | .unwritten_changes 204 | .get(key) 205 | { 206 | return CacheFuture::hit(entry.clone()); 207 | } 208 | } 209 | 210 | // If we got here, the key must be in the underlying state or not present at all. 211 | CacheFuture::miss( 212 | self.state 213 | .read() 214 | .as_ref() 215 | .expect("delta must not have been applied") 216 | .get_raw(key), 217 | ) 218 | } 219 | 220 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut { 221 | // Check if we have a cache hit in the leaf cache. 222 | if let Some(entry) = self 223 | .leaf_cache 224 | .read() 225 | .as_ref() 226 | .expect("delta must not have been applied") 227 | .nonverifiable_changes 228 | .get(key) 229 | { 230 | return CacheFuture::hit(entry.clone()); 231 | } 232 | 233 | // Iterate through the stack, top to bottom, to see if we have a cache hit. 234 | for layer in self.layers.iter().rev() { 235 | if let Some(entry) = layer 236 | .read() 237 | .as_ref() 238 | .expect("delta must not have been applied") 239 | .nonverifiable_changes 240 | .get(key) 241 | { 242 | return CacheFuture::hit(entry.clone()); 243 | } 244 | } 245 | 246 | // If we got here, the key must be in the underlying state or not present at all. 247 | CacheFuture::miss( 248 | self.state 249 | .read() 250 | .as_ref() 251 | .expect("delta must not have been applied") 252 | .nonverifiable_get_raw(key), 253 | ) 254 | } 255 | 256 | fn object_type(&self, key: &'static str) -> Option { 257 | // Check if we have a cache hit in the leaf cache. 258 | if let Some(entry) = self 259 | .leaf_cache 260 | .read() 261 | .as_ref() 262 | .expect("delta must not have been applied") 263 | .ephemeral_objects 264 | .get(key) 265 | { 266 | // We have to explicitly call `Any::type_id(&**v)` here because this ensures that we are 267 | // asking for the type of the `Any` *inside* the `Box`, rather than the type of 268 | // `Box` itself. 269 | return entry.as_ref().map(|v| std::any::Any::type_id(&**v)); 270 | } 271 | 272 | // Iterate through the stack, top to bottom, to see if we have a cache hit. 273 | for layer in self.layers.iter().rev() { 274 | if let Some(entry) = layer 275 | .read() 276 | .as_ref() 277 | .expect("delta must not have been applied") 278 | .ephemeral_objects 279 | .get(key) 280 | { 281 | // We have to explicitly call `Any::type_id(&**v)` here because this ensures that we are 282 | // asking for the type of the `Any` *inside* the `Box`, rather than the type of 283 | // `Box` itself. 284 | return entry.as_ref().map(|v| std::any::Any::type_id(&**v)); 285 | } 286 | } 287 | 288 | // Fall through to the underlying store. 289 | self.state 290 | .read() 291 | .as_ref() 292 | .expect("delta must not have been applied") 293 | .object_type(key) 294 | } 295 | 296 | fn object_get(&self, key: &'static str) -> Option { 297 | // Check if we have a cache hit in the leaf cache. 298 | if let Some(entry) = self 299 | .leaf_cache 300 | .read() 301 | .as_ref() 302 | .expect("delta must not have been applied") 303 | .ephemeral_objects 304 | .get(key) 305 | { 306 | return entry 307 | .as_ref() 308 | .map(|v| { 309 | v.downcast_ref().unwrap_or_else(|| panic!("unexpected type for key \"{key}\" in `StateDelta::object_get`: expected type {}", std::any::type_name::())) 310 | }) 311 | .cloned(); 312 | } 313 | 314 | // Iterate through the stack, top to bottom, to see if we have a cache hit. 315 | for layer in self.layers.iter().rev() { 316 | if let Some(entry) = layer 317 | .read() 318 | .as_ref() 319 | .expect("delta must not have been applied") 320 | .ephemeral_objects 321 | .get(key) 322 | { 323 | return entry 324 | .as_ref() 325 | .map(|v| { 326 | v.downcast_ref().unwrap_or_else(|| panic!("unexpected type for key \"{key}\" in `StateDelta::object_get`: expected type {}", std::any::type_name::())) 327 | }).cloned(); 328 | } 329 | } 330 | 331 | // Fall through to the underlying store. 332 | self.state 333 | .read() 334 | .as_ref() 335 | .expect("delta must not have been applied") 336 | .object_get(key) 337 | } 338 | 339 | fn prefix_raw(&self, prefix: &str) -> Self::PrefixRawStream { 340 | let underlying = self 341 | .state 342 | .read() 343 | .as_ref() 344 | .expect("delta must not have been applied") 345 | .prefix_raw(prefix) 346 | .peekable(); 347 | StateDeltaPrefixRawStream { 348 | underlying, 349 | layers: self.layers.clone(), 350 | leaf_cache: self.leaf_cache.clone(), 351 | last_key: None, 352 | prefix: prefix.to_owned(), 353 | } 354 | } 355 | 356 | fn prefix_keys(&self, prefix: &str) -> Self::PrefixKeysStream { 357 | let underlying = self 358 | .state 359 | .read() 360 | .as_ref() 361 | .expect("delta must not have been applied") 362 | .prefix_keys(prefix) 363 | .peekable(); 364 | StateDeltaPrefixKeysStream { 365 | underlying, 366 | layers: self.layers.clone(), 367 | leaf_cache: self.leaf_cache.clone(), 368 | last_key: None, 369 | prefix: prefix.to_owned(), 370 | } 371 | } 372 | 373 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> Self::NonconsensusPrefixRawStream { 374 | let underlying = self 375 | .state 376 | .read() 377 | .as_ref() 378 | .expect("delta must not have been applied") 379 | .nonverifiable_prefix_raw(prefix) 380 | .peekable(); 381 | StateDeltaNonconsensusPrefixRawStream { 382 | underlying, 383 | layers: self.layers.clone(), 384 | leaf_cache: self.leaf_cache.clone(), 385 | last_key: None, 386 | prefix: prefix.to_vec(), 387 | } 388 | } 389 | 390 | fn nonverifiable_range_raw( 391 | &self, 392 | prefix: Option<&[u8]>, 393 | range: impl std::ops::RangeBounds>, 394 | ) -> anyhow::Result { 395 | let (range, (start, end)) = utils::convert_bounds(range)?; 396 | let underlying = self 397 | .state 398 | .read() 399 | .as_ref() 400 | .expect("delta must not have been applied") 401 | .nonverifiable_range_raw(prefix, range)? 402 | .peekable(); 403 | Ok(StateDeltaNonconsensusRangeRawStream { 404 | underlying, 405 | layers: self.layers.clone(), 406 | leaf_cache: self.leaf_cache.clone(), 407 | last_key: None, 408 | prefix: prefix.map(|p| p.to_vec()), 409 | range: (start, end), 410 | }) 411 | } 412 | } 413 | 414 | impl StateWrite for StateDelta { 415 | fn put_raw(&mut self, key: String, value: jmt::OwnedValue) { 416 | self.leaf_cache 417 | .write() 418 | .as_mut() 419 | .expect("delta must not have been applied") 420 | .unwritten_changes 421 | .insert(key, Some(value)); 422 | } 423 | 424 | fn delete(&mut self, key: String) { 425 | self.leaf_cache 426 | .write() 427 | .as_mut() 428 | .expect("delta must not have been applied") 429 | .unwritten_changes 430 | .insert(key, None); 431 | } 432 | 433 | fn nonverifiable_delete(&mut self, key: Vec) { 434 | tracing::trace!(key = ?EscapedByteSlice(&key), "deleting key"); 435 | self.leaf_cache 436 | .write() 437 | .as_mut() 438 | .expect("delta must not have been applied") 439 | .nonverifiable_changes 440 | .insert(key, None); 441 | } 442 | 443 | fn nonverifiable_put_raw(&mut self, key: Vec, value: Vec) { 444 | tracing::trace!(key = ?EscapedByteSlice(&key), value = ?EscapedByteSlice(&value), "insert nonverifiable change"); 445 | self.leaf_cache 446 | .write() 447 | .as_mut() 448 | .expect("delta must not have been applied") 449 | .nonverifiable_changes 450 | .insert(key, Some(value)); 451 | } 452 | 453 | fn object_put(&mut self, key: &'static str, value: T) { 454 | if let Some(previous_type) = self.object_type(key) { 455 | if std::any::TypeId::of::() != previous_type { 456 | panic!( 457 | "unexpected type for key \"{key}\" in `StateDelta::object_put`: expected type {expected}", 458 | expected = std::any::type_name::(), 459 | ); 460 | } 461 | } 462 | self.leaf_cache 463 | .write() 464 | .as_mut() 465 | .expect("delta must not have been applied") 466 | .ephemeral_objects 467 | .insert(key, Some(Box::new(value))); 468 | } 469 | 470 | fn object_delete(&mut self, key: &'static str) { 471 | self.leaf_cache 472 | .write() 473 | .as_mut() 474 | .expect("delta must not have been applied") 475 | .ephemeral_objects 476 | .insert(key, None); 477 | } 478 | 479 | fn object_merge( 480 | &mut self, 481 | objects: std::collections::BTreeMap<&'static str, Option>>, 482 | ) { 483 | self.leaf_cache 484 | .write() 485 | .as_mut() 486 | .expect("delta must not have been applied") 487 | .ephemeral_objects 488 | .extend(objects); 489 | } 490 | 491 | fn record(&mut self, event: abci::Event) { 492 | self.leaf_cache 493 | .write() 494 | .as_mut() 495 | .expect("delta must not have been applied") 496 | .events 497 | .push(event) 498 | } 499 | } 500 | 501 | /// Extension trait providing `try_begin_transaction()` on `Arc>`. 502 | pub trait ArcStateDeltaExt: Sized { 503 | type S: StateRead; 504 | /// Attempts to begin a transaction on this `Arc`, returning `None` if the `Arc` is shared. 505 | fn try_begin_transaction(&'_ mut self) -> Option>>; 506 | } 507 | 508 | impl ArcStateDeltaExt for Arc> { 509 | type S = S; 510 | fn try_begin_transaction(&'_ mut self) -> Option>> { 511 | Arc::get_mut(self).map(StateDelta::new) 512 | } 513 | } 514 | -------------------------------------------------------------------------------- /src/escaped_byte_slice.rs: -------------------------------------------------------------------------------- 1 | /// A wrapper type for a byte slice that implements `Debug` by escaping 2 | /// non-printable bytes. 3 | /// 4 | /// This is exposed as part of the public API for convenience of downstream 5 | /// users' debugging of state accesses. 6 | pub struct EscapedByteSlice<'a>(pub &'a [u8]); 7 | 8 | impl<'a> std::fmt::Debug for EscapedByteSlice<'a> { 9 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 10 | write!(f, "b\"")?; 11 | for &b in self.0 { 12 | // https://doc.rust-lang.org/reference/tokens.html#byte-escapes 13 | #[allow(clippy::manual_range_contains)] 14 | if b == b'\n' { 15 | write!(f, "\\n")?; 16 | } else if b == b'\r' { 17 | write!(f, "\\r")?; 18 | } else if b == b'\t' { 19 | write!(f, "\\t")?; 20 | } else if b == b'\\' || b == b'"' { 21 | write!(f, "\\{}", b as char)?; 22 | } else if b == b'\0' { 23 | write!(f, "\\0")?; 24 | // ASCII printable 25 | } else if b >= 0x20 && b < 0x7f { 26 | write!(f, "{}", b as char)?; 27 | } else { 28 | write!(f, "\\x{:02x}", b)?; 29 | } 30 | } 31 | write!(f, "\"")?; 32 | Ok(()) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/gen/proto_descriptor.bin.no_lfs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/penumbra-zone/cnidarium/27e8b027452cf1775b10ac2cf8bda4f013de0d82/src/gen/proto_descriptor.bin.no_lfs -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Storage and management of chain state, backed by Jellyfish Merkle Trees and RocksDB. 2 | //! 3 | //! This crate provides a versioned, verifiable key-value store that also 4 | //! supports lightweight, copy-on-write snapshots and transactional semantics. 5 | //! The [`Storage`] type is a handle for an instance of a backing store, 6 | //! implemented using RocksDB. The storage records a sequence of versioned 7 | //! [`Snapshot`]s. The [`Snapshot`] type is a lightweight snapshot of a particular 8 | //! version of the chain state. 9 | //! 10 | //! Each [`Snapshot`] instance can also be used as the basis for a copy-on-write 11 | //! fork to build up changes before committing them to persistent storage. The 12 | //! [`StateDelta`] type collects a group of writes, which can then be applied to 13 | //! the (in-memory) [`StateDelta`] overlay. Finally, the changes accumulated in the 14 | //! [`StateDelta`] instance can be committed to the persistent [`Storage`]. 15 | //! 16 | //! Reads are performed with the [`StateRead`] trait, implemented by both 17 | //! [`Snapshot`] and [`StateDelta`], and reflect any currently cached writes. 18 | //! Writes are performed with the [`StateWrite`] trait, which is only 19 | //! implemented for [`StateDelta`]. 20 | //! 21 | //! The storage system provides three data stores: 22 | //! 23 | //! * A verifiable key-value store, with UTF-8 keys and byte values, backed by 24 | //! the Jellyfish Merkle Tree. The JMT is a sparse merkle tree that records 25 | //! hashed keys, so we also record an index of the keys themselves to allow 26 | //! range queries on keys rather than key hashes. This index, however, is not 27 | //! part of the verifiable consensus state. 28 | //! 29 | //! * A secondary, non-verifiable key-value store with byte keys and byte 30 | //! values, backed directly by RocksDB. This is intended for use building 31 | //! application-specific indexes of the verifiable consensus state. 32 | //! 33 | //! * A tertiary, in-memory object store. This is intended for use implementing 34 | //! accumulators, like lists of data to be batch-processed at the end of the 35 | //! block. The object store clones on read to prevent violations of 36 | //! transactional semantics, so it should be used with immutable data structures 37 | //! like those in the `im` crate that implement copy-on-write behavior 38 | //! internally. 39 | //! 40 | //! The storage system also supports prefixed "substores", somewhat similar to 41 | //! the Cosmos SDK's multistore design. Each substore has a separate JMT, whose 42 | //! root hash is written into the base store under the prefix. This allows use 43 | //! cases like storing IBC data in a subtree. The substore's non-verifiable 44 | //! store is also stored in a separate RocksDB column family, allowing storage 45 | //! optimizations. 46 | //! 47 | //! Remember that the chain state is a public API. Mapping from raw byte values 48 | //! to typed data should be accomplished by means of extension traits. For 49 | //! instance, the `penumbra_proto` crate provides an extension trait to 50 | //! automatically (de)serialize into proto or domain types, allowing its use as 51 | //! an object store. 52 | //! 53 | //! With the `rpc` feature enabled, this crate also provides a GRPC interface to 54 | //! the key-value store using Tonic. 55 | #![deny(clippy::unwrap_used)] 56 | // Requires nightly. 57 | #![cfg_attr(docsrs, feature(doc_auto_cfg))] 58 | // We use `HashMap`s opportunistically. 59 | #![allow(clippy::disallowed_types)] 60 | 61 | mod cache; 62 | mod delta; 63 | mod escaped_byte_slice; 64 | mod metrics; 65 | mod read; 66 | mod snapshot; 67 | mod snapshot_cache; 68 | mod storage; 69 | mod store; 70 | #[cfg(test)] 71 | mod tests; 72 | mod utils; 73 | mod write; 74 | mod write_batch; 75 | 76 | #[cfg(feature = "metrics")] 77 | pub use crate::metrics::register_metrics; 78 | pub use cache::Cache; 79 | pub use delta::{ArcStateDeltaExt, StateDelta}; 80 | pub use escaped_byte_slice::EscapedByteSlice; 81 | pub use jmt::{ics23_spec, RootHash}; 82 | pub use read::StateRead; 83 | pub use snapshot::Snapshot; 84 | pub use storage::{Storage, TempStorage}; 85 | pub use write::StateWrite; 86 | pub use write_batch::StagedWriteBatch; 87 | // We expose an internal to facilitate downstreams 88 | // building debug tooling. 89 | pub use store::substore::DbNodeKey; 90 | 91 | pub mod future; 92 | 93 | #[cfg(feature = "rpc")] 94 | pub mod rpc; 95 | 96 | #[cfg(feature = "proto")] 97 | pub mod proto; 98 | -------------------------------------------------------------------------------- /src/metrics.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "metrics")] 2 | //! Crate-specific metrics functionality. 3 | //! 4 | //! This module re-exports the contents of the `metrics` crate. This is 5 | //! effectively a way to monkey-patch the functions in this module into the 6 | //! `metrics` crate, at least from the point of view of the other code in this 7 | //! crate. 8 | //! 9 | //! Code in this crate that wants to use metrics should `use crate::metrics;`, 10 | //! so that this module shadows the `metrics` crate. 11 | //! 12 | //! This trick is probably good to avoid in general, because it could be 13 | //! confusing, but in this limited case, it seems like a clean option. 14 | 15 | pub use metrics::*; 16 | 17 | /// Registers all metrics used by this crate. 18 | pub fn register_metrics() { 19 | describe_histogram!( 20 | STORAGE_GET_RAW_DURATION, 21 | Unit::Seconds, 22 | "The duration of a get_raw request" 23 | ); 24 | describe_histogram!( 25 | STORAGE_NONCONSENSUS_GET_RAW_DURATION, 26 | Unit::Seconds, 27 | "The duration of a nonverifiable_get_raw request" 28 | ); 29 | } 30 | 31 | pub const STORAGE_GET_RAW_DURATION: &str = "cnidarium_get_raw_duration_seconds"; 32 | pub const STORAGE_NONCONSENSUS_GET_RAW_DURATION: &str = 33 | "cnidarium_nonverifiable_get_raw_duration_seconds"; 34 | -------------------------------------------------------------------------------- /src/proto.rs: -------------------------------------------------------------------------------- 1 | // Autogen code isn't clippy clean: 2 | #[allow(clippy::unwrap_used)] 3 | pub mod v1 { 4 | include!("gen/penumbra.cnidarium.v1.rs"); 5 | include!("gen/penumbra.cnidarium.v1.serde.rs"); 6 | } 7 | 8 | // https://github.com/penumbra-zone/penumbra/issues/3038#issuecomment-1722534133 9 | pub const FILE_DESCRIPTOR_SET: &[u8] = include_bytes!("gen/proto_descriptor.bin.no_lfs"); 10 | -------------------------------------------------------------------------------- /src/read.rs: -------------------------------------------------------------------------------- 1 | use std::{any::Any, future::Future, ops::RangeBounds, sync::Arc}; 2 | 3 | use anyhow::Result; 4 | use futures::Stream; 5 | 6 | /// Read access to chain state. 7 | pub trait StateRead: Send + Sync { 8 | type GetRawFut: Future>>> + Send + 'static; 9 | type PrefixRawStream: Stream)>> + Send + 'static; 10 | type PrefixKeysStream: Stream> + Send + 'static; 11 | type NonconsensusPrefixRawStream: Stream, Vec)>> + Send + 'static; 12 | type NonconsensusRangeRawStream: Stream, Vec)>> + Send + 'static; 13 | 14 | /// Gets a value from the verifiable key-value store as raw bytes. 15 | /// 16 | /// Users should generally prefer to use `get` or `get_proto` from an extension trait. 17 | fn get_raw(&self, key: &str) -> Self::GetRawFut; 18 | 19 | /// Gets a byte value from the non-verifiable key-value store. 20 | /// 21 | /// This is intended for application-specific indexes of the verifiable 22 | /// consensus state, rather than for use as a primary data storage method. 23 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut; 24 | 25 | /// Gets an object from the ephemeral key-object store. 26 | /// 27 | /// This is intended to allow application components to build up batched 28 | /// data transactionally, ensuring that a transaction's contributions to 29 | /// some batched data are only included if the entire transaction executed 30 | /// successfully. This data is not persisted to the `Storage` during 31 | /// `commit`. 32 | /// 33 | /// # Returns 34 | /// 35 | /// - `Some(&T)` if a value of type `T` was present at `key`. 36 | /// - `None` if `key` was not present. 37 | /// 38 | /// # Panics 39 | /// 40 | /// If there *is* a value at `key` but it is not of the type requested. 41 | fn object_get(&self, key: &'static str) -> Option; 42 | 43 | /// Gets the [`TypeId`] of the object stored at `key` in the ephemeral key-object store, if any 44 | /// is present. 45 | fn object_type(&self, key: &'static str) -> Option; 46 | 47 | /// Retrieve all values for keys matching a prefix from the verifiable key-value store, as raw bytes. 48 | /// 49 | /// Users should generally prefer to use `prefix` or `prefix_proto` from an extension trait. 50 | fn prefix_raw(&self, prefix: &str) -> Self::PrefixRawStream; 51 | 52 | /// Retrieve all keys (but not values) matching a prefix from the verifiable key-value store. 53 | fn prefix_keys(&self, prefix: &str) -> Self::PrefixKeysStream; 54 | 55 | /// Retrieve all values for keys matching a prefix from the non-verifiable key-value store, as raw bytes. 56 | /// 57 | /// Users should generally prefer to use wrapper methods in an extension trait. 58 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> Self::NonconsensusPrefixRawStream; 59 | 60 | /// Retrieve all values for keys in a range from the non-verifiable key-value store, as raw bytes. 61 | /// This method does not support inclusive ranges, and will return an error if passed one. 62 | /// 63 | /// Users should generally prefer to use wrapper methods in an extension trait. 64 | fn nonverifiable_range_raw( 65 | &self, 66 | prefix: Option<&[u8]>, 67 | range: impl RangeBounds>, 68 | ) -> Result; 69 | } 70 | 71 | impl<'a, S: StateRead + Send + Sync> StateRead for &'a S { 72 | type GetRawFut = S::GetRawFut; 73 | type PrefixRawStream = S::PrefixRawStream; 74 | type PrefixKeysStream = S::PrefixKeysStream; 75 | type NonconsensusPrefixRawStream = S::NonconsensusPrefixRawStream; 76 | type NonconsensusRangeRawStream = S::NonconsensusRangeRawStream; 77 | 78 | fn get_raw(&self, key: &str) -> Self::GetRawFut { 79 | (**self).get_raw(key) 80 | } 81 | 82 | fn prefix_raw(&self, prefix: &str) -> S::PrefixRawStream { 83 | (**self).prefix_raw(prefix) 84 | } 85 | 86 | fn prefix_keys(&self, prefix: &str) -> S::PrefixKeysStream { 87 | (**self).prefix_keys(prefix) 88 | } 89 | 90 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> S::NonconsensusPrefixRawStream { 91 | (**self).nonverifiable_prefix_raw(prefix) 92 | } 93 | 94 | fn nonverifiable_range_raw( 95 | &self, 96 | prefix: Option<&[u8]>, 97 | range: impl std::ops::RangeBounds>, 98 | ) -> anyhow::Result { 99 | (**self).nonverifiable_range_raw(prefix, range) 100 | } 101 | 102 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut { 103 | (**self).nonverifiable_get_raw(key) 104 | } 105 | 106 | fn object_get(&self, key: &'static str) -> Option { 107 | (**self).object_get(key) 108 | } 109 | 110 | fn object_type(&self, key: &'static str) -> Option { 111 | (**self).object_type(key) 112 | } 113 | } 114 | 115 | impl<'a, S: StateRead + Send + Sync> StateRead for &'a mut S { 116 | type GetRawFut = S::GetRawFut; 117 | type PrefixRawStream = S::PrefixRawStream; 118 | type PrefixKeysStream = S::PrefixKeysStream; 119 | type NonconsensusPrefixRawStream = S::NonconsensusPrefixRawStream; 120 | type NonconsensusRangeRawStream = S::NonconsensusRangeRawStream; 121 | 122 | fn get_raw(&self, key: &str) -> Self::GetRawFut { 123 | (**self).get_raw(key) 124 | } 125 | 126 | fn prefix_raw(&self, prefix: &str) -> S::PrefixRawStream { 127 | (**self).prefix_raw(prefix) 128 | } 129 | 130 | fn prefix_keys(&self, prefix: &str) -> S::PrefixKeysStream { 131 | (**self).prefix_keys(prefix) 132 | } 133 | 134 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> S::NonconsensusPrefixRawStream { 135 | (**self).nonverifiable_prefix_raw(prefix) 136 | } 137 | 138 | fn nonverifiable_range_raw( 139 | &self, 140 | prefix: Option<&[u8]>, 141 | range: impl RangeBounds>, 142 | ) -> Result { 143 | (**self).nonverifiable_range_raw(prefix, range) 144 | } 145 | 146 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut { 147 | (**self).nonverifiable_get_raw(key) 148 | } 149 | 150 | fn object_get(&self, key: &'static str) -> Option { 151 | (**self).object_get(key) 152 | } 153 | 154 | fn object_type(&self, key: &'static str) -> Option { 155 | (**self).object_type(key) 156 | } 157 | } 158 | 159 | impl StateRead for Arc { 160 | type GetRawFut = S::GetRawFut; 161 | type PrefixRawStream = S::PrefixRawStream; 162 | type PrefixKeysStream = S::PrefixKeysStream; 163 | type NonconsensusPrefixRawStream = S::NonconsensusPrefixRawStream; 164 | type NonconsensusRangeRawStream = S::NonconsensusRangeRawStream; 165 | 166 | fn get_raw(&self, key: &str) -> Self::GetRawFut { 167 | (**self).get_raw(key) 168 | } 169 | 170 | fn prefix_raw(&self, prefix: &str) -> S::PrefixRawStream { 171 | (**self).prefix_raw(prefix) 172 | } 173 | 174 | fn prefix_keys(&self, prefix: &str) -> S::PrefixKeysStream { 175 | (**self).prefix_keys(prefix) 176 | } 177 | 178 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> S::NonconsensusPrefixRawStream { 179 | (**self).nonverifiable_prefix_raw(prefix) 180 | } 181 | 182 | fn nonverifiable_range_raw( 183 | &self, 184 | prefix: Option<&[u8]>, 185 | range: impl RangeBounds>, 186 | ) -> Result { 187 | (**self).nonverifiable_range_raw(prefix, range) 188 | } 189 | 190 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut { 191 | (**self).nonverifiable_get_raw(key) 192 | } 193 | 194 | fn object_get(&self, key: &'static str) -> Option { 195 | (**self).object_get(key) 196 | } 197 | 198 | fn object_type(&self, key: &'static str) -> Option { 199 | (**self).object_type(key) 200 | } 201 | } 202 | 203 | impl StateRead for () { 204 | type GetRawFut = futures::future::Ready>>>; 205 | type PrefixRawStream = futures::stream::Iter)>>>; 206 | type PrefixKeysStream = futures::stream::Iter>>; 207 | type NonconsensusPrefixRawStream = 208 | futures::stream::Iter, Vec)>>>; 209 | type NonconsensusRangeRawStream = 210 | futures::stream::Iter, Vec)>>>; 211 | 212 | fn get_raw(&self, _key: &str) -> Self::GetRawFut { 213 | futures::future::ready(Ok(None)) 214 | } 215 | 216 | fn nonverifiable_get_raw(&self, _key: &[u8]) -> Self::GetRawFut { 217 | futures::future::ready(Ok(None)) 218 | } 219 | 220 | fn object_get(&self, _key: &'static str) -> Option { 221 | None 222 | } 223 | 224 | fn object_type(&self, _key: &'static str) -> Option { 225 | None 226 | } 227 | 228 | fn prefix_raw(&self, _prefix: &str) -> Self::PrefixRawStream { 229 | futures::stream::iter(std::iter::empty()) 230 | } 231 | 232 | fn prefix_keys(&self, _prefix: &str) -> Self::PrefixKeysStream { 233 | futures::stream::iter(std::iter::empty()) 234 | } 235 | 236 | fn nonverifiable_prefix_raw(&self, _prefix: &[u8]) -> Self::NonconsensusPrefixRawStream { 237 | futures::stream::iter(std::iter::empty()) 238 | } 239 | 240 | fn nonverifiable_range_raw( 241 | &self, 242 | _prefix: Option<&[u8]>, 243 | _range: impl RangeBounds>, 244 | ) -> Result { 245 | Ok(futures::stream::iter(std::iter::empty())) 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/rpc.rs: -------------------------------------------------------------------------------- 1 | pub struct Server { 2 | storage: Storage, 3 | } 4 | 5 | impl Server { 6 | pub fn new(storage: Storage) -> Self { 7 | Self { storage } 8 | } 9 | } 10 | use std::pin::Pin; 11 | 12 | use crate::proto::v1::{ 13 | key_value_response::Value as JMTValue, non_verifiable_key_value_response::Value as NVValue, 14 | query_service_server::QueryService, watch_response as wr, KeyValueRequest, KeyValueResponse, 15 | NonVerifiableKeyValueRequest, NonVerifiableKeyValueResponse, PrefixValueRequest, 16 | PrefixValueResponse, WatchRequest, WatchResponse, 17 | }; 18 | use crate::read::StateRead; 19 | use futures::{StreamExt, TryStreamExt}; 20 | use regex::Regex; 21 | use tokio_stream::wrappers::ReceiverStream; 22 | use tonic::Status; 23 | use tracing::instrument; 24 | 25 | use crate::Storage; 26 | 27 | #[tonic::async_trait] 28 | impl QueryService for Server { 29 | #[instrument(skip(self, request))] 30 | async fn non_verifiable_key_value( 31 | &self, 32 | request: tonic::Request, 33 | ) -> Result, Status> { 34 | let state = self.storage.latest_snapshot(); 35 | let request = request.into_inner(); 36 | 37 | if request.key.is_none() || request.key.as_ref().expect("key is Some").inner.is_empty() { 38 | return Err(Status::invalid_argument("key is empty")); 39 | } 40 | 41 | let key = request.key.expect("key is Some").inner; 42 | let some_value = state 43 | .nonverifiable_get_raw(&key) 44 | .await 45 | .map_err(|e| tonic::Status::internal(e.to_string()))?; 46 | 47 | Ok(tonic::Response::new(NonVerifiableKeyValueResponse { 48 | value: some_value.map(|value| NVValue { value }), 49 | })) 50 | } 51 | 52 | #[instrument(skip(self, request))] 53 | async fn key_value( 54 | &self, 55 | request: tonic::Request, 56 | ) -> Result, Status> { 57 | let state = self.storage.latest_snapshot(); 58 | // We map the error here to avoid including `tonic` as a dependency 59 | // in the `chain` crate, to support its compilation to wasm. 60 | let request = request.into_inner(); 61 | tracing::debug!(?request, "processing key_value request"); 62 | 63 | if request.key.is_empty() { 64 | return Err(Status::invalid_argument("key is empty")); 65 | } 66 | 67 | let (some_value, proof) = { 68 | // Don't generate the proof if the request doesn't ask for it. 69 | let (v, p) = if request.proof { 70 | let (v, p) = state 71 | .get_with_proof(request.key.into_bytes()) 72 | .await 73 | .map_err(|e| tonic::Status::internal(e.to_string()))?; 74 | (v, Some(p)) 75 | } else { 76 | ( 77 | state 78 | .get_raw(&request.key) 79 | .await 80 | .map_err(|e| tonic::Status::internal(e.to_string()))?, 81 | None, 82 | ) 83 | }; 84 | (v, p) 85 | }; 86 | 87 | Ok(tonic::Response::new(KeyValueResponse { 88 | value: some_value.map(|value| JMTValue { value }), 89 | proof: if request.proof { 90 | Some(ibc_proto::ibc::core::commitment::v1::MerkleProof { 91 | proofs: proof 92 | .expect("proof should be present") 93 | .proofs 94 | .into_iter() 95 | .map(|p| { 96 | let mut encoded = Vec::new(); 97 | prost::Message::encode(&p, &mut encoded).expect("able to encode proof"); 98 | prost::Message::decode(&*encoded).expect("able to decode proof") 99 | }) 100 | .collect(), 101 | }) 102 | } else { 103 | None 104 | }, 105 | })) 106 | } 107 | 108 | type PrefixValueStream = 109 | Pin> + Send>>; 110 | 111 | #[instrument(skip(self, request))] 112 | async fn prefix_value( 113 | &self, 114 | request: tonic::Request, 115 | ) -> Result, Status> { 116 | let state = self.storage.latest_snapshot(); 117 | let request = request.into_inner(); 118 | tracing::debug!(?request); 119 | 120 | if request.prefix.is_empty() { 121 | return Err(Status::invalid_argument("prefix is empty")); 122 | } 123 | 124 | Ok(tonic::Response::new( 125 | state 126 | .prefix_raw(&request.prefix) 127 | .map_ok(|i: (String, Vec)| { 128 | let (key, value) = i; 129 | PrefixValueResponse { key, value } 130 | }) 131 | .map_err(|e: anyhow::Error| { 132 | tonic::Status::unavailable(format!( 133 | "error getting prefix value from storage: {e}" 134 | )) 135 | }) 136 | .boxed(), 137 | )) 138 | } 139 | 140 | type WatchStream = ReceiverStream>; 141 | 142 | #[instrument(skip(self, request))] 143 | async fn watch( 144 | &self, 145 | request: tonic::Request, 146 | ) -> Result, Status> { 147 | let request = request.into_inner(); 148 | tracing::debug!(?request); 149 | 150 | const MAX_REGEX_LEN: usize = 1024; 151 | 152 | let key_regex = match request.key_regex.as_str() { 153 | "" => None, 154 | _ => Some( 155 | regex::RegexBuilder::new(&request.key_regex) 156 | .size_limit(MAX_REGEX_LEN) 157 | .build() 158 | .map_err(|e| Status::invalid_argument(format!("invalid key_regex: {}", e)))?, 159 | ), 160 | }; 161 | 162 | // Use the `bytes` regex to allow matching byte strings. 163 | let nv_key_regex = match request.nv_key_regex.as_str() { 164 | "" => None, 165 | _ => Some( 166 | regex::bytes::RegexBuilder::new(&request.nv_key_regex) 167 | .size_limit(MAX_REGEX_LEN) 168 | .unicode(false) 169 | .build() 170 | .map_err(|e| { 171 | Status::invalid_argument(format!("invalid nv_key_regex: {}", e)) 172 | })?, 173 | ), 174 | }; 175 | 176 | let (tx, rx) = tokio::sync::mpsc::channel::>(10); 177 | 178 | tokio::spawn(watch_changes( 179 | self.storage.clone(), 180 | key_regex, 181 | nv_key_regex, 182 | tx, 183 | )); 184 | 185 | Ok(tonic::Response::new(ReceiverStream::new(rx))) 186 | } 187 | } 188 | 189 | async fn watch_changes( 190 | storage: Storage, 191 | key_regex: Option, 192 | nv_key_regex: Option, 193 | tx: tokio::sync::mpsc::Sender>, 194 | ) -> anyhow::Result<()> { 195 | let mut changes_rx = storage.subscribe_changes(); 196 | while !tx.is_closed() { 197 | // Wait for a new set of changes, reporting an error if we don't get one. 198 | if let Err(e) = changes_rx.changed().await { 199 | tx.send(Err(tonic::Status::internal(e.to_string()))).await?; 200 | } 201 | let (version, changes) = changes_rx.borrow_and_update().clone(); 202 | 203 | if key_regex.is_some() || nv_key_regex.is_none() { 204 | for (key, value) in changes.unwritten_changes().iter() { 205 | if key_regex 206 | .as_ref() 207 | .unwrap_or(&Regex::new(r"").expect("empty regex ok")) 208 | .is_match(key) 209 | { 210 | tx.send(Ok(WatchResponse { 211 | version, 212 | entry: Some(wr::Entry::Kv(wr::KeyValue { 213 | key: key.clone(), 214 | value: value.as_ref().cloned().unwrap_or_default(), 215 | deleted: value.is_none(), 216 | })), 217 | })) 218 | .await?; 219 | } 220 | } 221 | } 222 | 223 | if nv_key_regex.is_some() || key_regex.is_none() { 224 | for (key, value) in changes.nonverifiable_changes().iter() { 225 | if nv_key_regex 226 | .as_ref() 227 | .unwrap_or(®ex::bytes::Regex::new(r"").expect("empty regex ok")) 228 | .is_match(key) 229 | { 230 | tx.send(Ok(WatchResponse { 231 | version, 232 | entry: Some(wr::Entry::NvKv(wr::NvKeyValue { 233 | key: key.clone(), 234 | value: value.as_ref().cloned().unwrap_or_default(), 235 | deleted: value.is_none(), 236 | })), 237 | })) 238 | .await?; 239 | } 240 | } 241 | } 242 | } 243 | return Ok(()); 244 | } 245 | -------------------------------------------------------------------------------- /src/snapshot.rs: -------------------------------------------------------------------------------- 1 | use std::iter; 2 | use std::{any::Any, sync::Arc}; 3 | 4 | use anyhow::Result; 5 | use async_trait::async_trait; 6 | use ibc_types::core::commitment::MerkleProof; 7 | use tokio::sync::mpsc; 8 | use tracing::Span; 9 | 10 | #[cfg(feature = "metrics")] 11 | use crate::metrics; 12 | use crate::store::multistore::{self, MultistoreCache}; 13 | use crate::{store, StateRead}; 14 | 15 | mod rocks_wrapper; 16 | 17 | pub(crate) use rocks_wrapper::RocksDbSnapshot; 18 | 19 | /// A snapshot of the underlying storage at a specific state version, suitable 20 | /// for read-only access by multiple threads, e.g., RPC calls. 21 | /// 22 | /// Snapshots are cheap to create and clone. Internally, they're implemented as 23 | /// a wrapper around a [RocksDB snapshot](https://github.com/facebook/rocksdb/wiki/Snapshot) 24 | /// with a pinned JMT version number for the snapshot. 25 | #[derive(Clone)] 26 | pub struct Snapshot(pub(crate) Arc); 27 | 28 | // We don't want to expose the `TreeReader` implementation outside of this crate. 29 | #[derive(Debug)] 30 | pub(crate) struct Inner { 31 | /// Tracks the latest version of each substore, and routes keys to the correct substore. 32 | pub(crate) multistore_cache: MultistoreCache, 33 | /// A handle to the underlying RocksDB snapshot. 34 | pub(crate) snapshot: Arc, 35 | /// The version of the main JMT tree. 36 | pub(crate) version: jmt::Version, 37 | // Used to retrieve column family handles. 38 | pub(crate) db: Arc, 39 | } 40 | 41 | impl Snapshot { 42 | /// Creates a new `Snapshot` with the given version and substore configs. 43 | pub(crate) fn new( 44 | db: Arc, 45 | version: jmt::Version, 46 | multistore_cache: multistore::MultistoreCache, 47 | ) -> Self { 48 | Self(Arc::new(Inner { 49 | snapshot: Arc::new(RocksDbSnapshot::new(db.clone())), 50 | version, 51 | db, 52 | multistore_cache, 53 | })) 54 | } 55 | 56 | pub fn version(&self) -> jmt::Version { 57 | self.0.version 58 | } 59 | 60 | /// Returns some value corresponding to the key, along with an ICS23 existence proof 61 | /// up to the current JMT root hash. If the key is not present, returns `None` and a 62 | /// non-existence proof. 63 | pub async fn get_with_proof(&self, key: Vec) -> Result<(Option>, MerkleProof)> { 64 | if key.is_empty() { 65 | anyhow::bail!("empty keys are not allowed") 66 | } 67 | 68 | let span = tracing::Span::current(); 69 | let rocksdb_snapshot = self.0.snapshot.clone(); 70 | let db = self.0.db.clone(); 71 | let mut proofs = vec![]; 72 | 73 | let (substore_key, substore_config) = self.0.multistore_cache.config.route_key_bytes(&key); 74 | let substore_key_bytes = substore_key.to_vec(); 75 | let substore_version = self.substore_version(&substore_config).unwrap_or(u64::MAX); 76 | let key_to_substore_root = substore_config.prefix.clone(); 77 | 78 | let substore = store::substore::SubstoreSnapshot { 79 | config: substore_config, 80 | rocksdb_snapshot: rocksdb_snapshot.clone(), 81 | version: substore_version, 82 | db: db.clone(), 83 | }; 84 | 85 | let (substore_value, substore_commitment_proof) = tokio::task::spawn_blocking({ 86 | let span = span.clone(); 87 | move || span.in_scope(|| substore.get_with_proof(substore_key_bytes)) 88 | }) 89 | .await??; 90 | 91 | proofs.push(substore_commitment_proof); 92 | 93 | // in the case where we request a proof for a key that is in a substore, also get a proof from the root to the substore key. 94 | if !key_to_substore_root.is_empty() { 95 | let main_store_config = self.0.multistore_cache.config.main_store.clone(); 96 | let main_version = self 97 | .substore_version(&main_store_config) 98 | .unwrap_or(u64::MAX); 99 | let mainstore = store::substore::SubstoreSnapshot { 100 | config: main_store_config, 101 | rocksdb_snapshot, 102 | version: main_version, 103 | db, 104 | }; 105 | 106 | let (_, main_commitment_proof) = tokio::task::spawn_blocking({ 107 | let span = span.clone(); 108 | move || span.in_scope(|| mainstore.get_with_proof(key_to_substore_root.into())) 109 | }) 110 | .await??; 111 | 112 | proofs.push(main_commitment_proof); 113 | } 114 | 115 | Ok(( 116 | substore_value, 117 | MerkleProof { 118 | proofs: proofs.clone(), 119 | }, 120 | )) 121 | } 122 | 123 | pub fn prefix_version(&self, prefix: &str) -> Result> { 124 | let Some(config) = self 125 | .0 126 | .multistore_cache 127 | .config 128 | .find_substore(prefix.as_bytes()) 129 | else { 130 | anyhow::bail!("rquested a version for a prefix that does not exist (prefix={prefix})") 131 | }; 132 | 133 | Ok(self.substore_version(&config)) 134 | } 135 | 136 | /// Returns the root hash of the subtree corresponding to the given prefix. 137 | /// If the prefix is empty, the root hash of the main tree is returned. 138 | /// 139 | /// # Errors 140 | /// Returns an error if the supplied prefix does not correspond to a known substore. 141 | pub async fn prefix_root_hash(&self, prefix: &str) -> Result { 142 | let span = tracing::Span::current(); 143 | let rocksdb_snapshot = self.0.snapshot.clone(); 144 | let db = self.0.db.clone(); 145 | 146 | let Some(config) = self 147 | .0 148 | .multistore_cache 149 | .config 150 | .find_substore(prefix.as_bytes()) 151 | else { 152 | anyhow::bail!("requested a root for a substore that does not exist (prefix={prefix})") 153 | }; 154 | 155 | let version = self 156 | .substore_version(&config) 157 | .expect("the substore exists and has been initialized"); 158 | 159 | let substore = store::substore::SubstoreSnapshot { 160 | config, 161 | rocksdb_snapshot, 162 | version, 163 | db, 164 | }; 165 | 166 | tracing::debug!( 167 | prefix = substore.config.prefix, 168 | version = substore.version, 169 | "fetching root hash for substore" 170 | ); 171 | 172 | tokio::task::spawn_blocking(move || span.in_scope(|| substore.root_hash())).await? 173 | } 174 | 175 | pub async fn root_hash(&self) -> Result { 176 | self.prefix_root_hash("").await 177 | } 178 | 179 | pub(crate) fn substore_version( 180 | &self, 181 | prefix: &Arc, 182 | ) -> Option { 183 | self.0.multistore_cache.get_version(prefix) 184 | } 185 | } 186 | 187 | #[async_trait] 188 | impl StateRead for Snapshot { 189 | type GetRawFut = crate::future::SnapshotFuture; 190 | type PrefixRawStream = 191 | tokio_stream::wrappers::ReceiverStream)>>; 192 | type PrefixKeysStream = tokio_stream::wrappers::ReceiverStream>; 193 | type NonconsensusPrefixRawStream = 194 | tokio_stream::wrappers::ReceiverStream, Vec)>>; 195 | type NonconsensusRangeRawStream = 196 | tokio_stream::wrappers::ReceiverStream, Vec)>>; 197 | 198 | /// Fetch a key from the JMT. 199 | fn get_raw(&self, key: &str) -> Self::GetRawFut { 200 | let span = Span::current(); 201 | let (key, config) = self.0.multistore_cache.config.route_key_str(key); 202 | 203 | let rocksdb_snapshot = self.0.snapshot.clone(); 204 | let db = self.0.db.clone(); 205 | 206 | let version = self 207 | .substore_version(&config) 208 | .expect("the substore exists and has been initialized"); 209 | 210 | let substore = store::substore::SubstoreSnapshot { 211 | config, 212 | rocksdb_snapshot, 213 | version, 214 | db, 215 | }; 216 | let key_hash = jmt::KeyHash::with::(key); 217 | 218 | crate::future::SnapshotFuture(tokio::task::spawn_blocking(move || { 219 | span.in_scope(|| { 220 | let _start = std::time::Instant::now(); 221 | let rsp = substore.get_jmt(key_hash); 222 | #[cfg(feature = "metrics")] 223 | metrics::histogram!(metrics::STORAGE_GET_RAW_DURATION).record(_start.elapsed()); 224 | rsp 225 | }) 226 | })) 227 | } 228 | 229 | /// Fetch a key from nonverifiable storage. 230 | fn nonverifiable_get_raw(&self, key: &[u8]) -> Self::GetRawFut { 231 | let span = Span::current(); 232 | let (key, config) = self.0.multistore_cache.config.route_key_bytes(key); 233 | 234 | let rocksdb_snapshot = self.0.snapshot.clone(); 235 | let db = self.0.db.clone(); 236 | 237 | let version = self 238 | .substore_version(&config) 239 | .expect("the substore exists and has been initialized"); 240 | 241 | let substore = store::substore::SubstoreSnapshot { 242 | config, 243 | rocksdb_snapshot, 244 | version, 245 | db, 246 | }; 247 | let key: Vec = key.to_vec(); 248 | 249 | crate::future::SnapshotFuture(tokio::task::spawn_blocking(move || { 250 | span.in_scope(|| { 251 | let _start = std::time::Instant::now(); 252 | 253 | let cf_nonverifiable = substore.config.cf_nonverifiable(&substore.db); 254 | let rsp = substore 255 | .rocksdb_snapshot 256 | .get_cf(cf_nonverifiable, key) 257 | .map_err(Into::into); 258 | #[cfg(feature = "metrics")] 259 | metrics::histogram!(metrics::STORAGE_NONCONSENSUS_GET_RAW_DURATION) 260 | .record(_start.elapsed()); 261 | rsp 262 | }) 263 | })) 264 | } 265 | 266 | /// Returns a stream of all key-value pairs with the given prefix. 267 | fn prefix_raw(&self, prefix: &str) -> Self::PrefixRawStream { 268 | let span = Span::current(); 269 | 270 | let rocksdb_snapshot = self.0.snapshot.clone(); 271 | let db = self.0.db.clone(); 272 | 273 | let (prefix_truncated, config) = self.0.multistore_cache.config.match_prefix_str(prefix); 274 | tracing::trace!(substore_key = prefix_truncated, substore_prefix = config.prefix, prefix_supplied = ?prefix, "matched prefix, fetching substore"); 275 | let substore_prefix = config.prefix.clone(); 276 | 277 | let version = self 278 | .substore_version(&config) 279 | .expect("the substore exists and has been initialized"); 280 | 281 | let substore = store::substore::SubstoreSnapshot { 282 | config, 283 | rocksdb_snapshot, 284 | version, 285 | db, 286 | }; 287 | 288 | let mut options = rocksdb::ReadOptions::default(); 289 | options.set_iterate_range(rocksdb::PrefixRange(prefix_truncated.as_bytes())); 290 | let mode = rocksdb::IteratorMode::Start; 291 | let (tx_prefix_item, rx_prefix_query) = mpsc::channel(10); 292 | 293 | // Since the JMT keys are hashed, we can't use a prefix iterator directly. 294 | // We need to first prefix range the key preimages column family, then use the hashed matches to fetch the values 295 | // from the JMT column family. 296 | tokio::task::spawn_blocking(move || { 297 | span.in_scope(|| { 298 | let cf_jmt_keys = substore.config.cf_jmt_keys(&substore.db); 299 | let jmt_keys_iterator = 300 | substore 301 | .rocksdb_snapshot 302 | .iterator_cf_opt(cf_jmt_keys, options, mode); 303 | 304 | for tuple in jmt_keys_iterator { 305 | // For each key that matches the prefix, fetch the value from the JMT column family. 306 | let (key_preimage, _) = tuple?; 307 | let substore_key = std::str::from_utf8(key_preimage.as_ref()) 308 | .expect("saved jmt keys are utf-8 strings"); 309 | let key_hash = jmt::KeyHash::with::(substore_key.as_bytes()); 310 | 311 | let full_key = if substore_prefix.is_empty() { 312 | substore_key.to_string() 313 | } else { 314 | format!("{substore_prefix}/{substore_key}").to_string() 315 | }; 316 | 317 | let v = substore 318 | .get_jmt(key_hash)? 319 | .expect("keys in jmt_keys should have a corresponding value in jmt"); 320 | 321 | tx_prefix_item.blocking_send(Ok((full_key, v)))?; 322 | } 323 | anyhow::Ok(()) 324 | }) 325 | }); 326 | 327 | tokio_stream::wrappers::ReceiverStream::new(rx_prefix_query) 328 | } 329 | 330 | // NOTE: this implementation is almost the same as the above, but without 331 | // fetching the values. not totally clear if this could be combined, or if that would 332 | // be better overall. 333 | fn prefix_keys(&self, prefix: &str) -> Self::PrefixKeysStream { 334 | let span = Span::current(); 335 | 336 | let rocksdb_snapshot = self.0.snapshot.clone(); 337 | let db = self.0.db.clone(); 338 | 339 | let (prefix_truncated, config) = self.0.multistore_cache.config.match_prefix_str(prefix); 340 | 341 | let version = self 342 | .substore_version(&config) 343 | .expect("the substore exists and has been initialized"); 344 | 345 | let substore = store::substore::SubstoreSnapshot { 346 | config, 347 | rocksdb_snapshot, 348 | version, 349 | db, 350 | }; 351 | 352 | let mut options = rocksdb::ReadOptions::default(); 353 | options.set_iterate_range(rocksdb::PrefixRange(prefix_truncated.as_bytes())); 354 | let mode = rocksdb::IteratorMode::Start; 355 | let (tx_prefix_keys, rx_prefix_keys) = mpsc::channel(10); 356 | 357 | tokio::task::spawn_blocking(move || { 358 | span.in_scope(|| { 359 | let cf_jmt_keys = substore.config.cf_jmt_keys(&substore.db); 360 | let iter = substore 361 | .rocksdb_snapshot 362 | .iterator_cf_opt(cf_jmt_keys, options, mode); 363 | 364 | let substore_prefix = &substore.config.prefix; 365 | 366 | for key_and_keyhash in iter { 367 | let (raw_preimage, _) = key_and_keyhash?; 368 | let preimage = std::str::from_utf8(raw_preimage.as_ref()) 369 | .expect("saved jmt keys are utf-8 strings"); 370 | 371 | let full_key = if substore_prefix.is_empty() { 372 | preimage.to_string() 373 | } else { 374 | format!("{substore_prefix}/{preimage}").to_string() 375 | }; 376 | 377 | tx_prefix_keys.blocking_send(Ok(full_key))?; 378 | } 379 | anyhow::Ok(()) 380 | }) 381 | }); 382 | 383 | tokio_stream::wrappers::ReceiverStream::new(rx_prefix_keys) 384 | } 385 | 386 | /// Returns a stream of all key-value pairs with the given prefix, from nonverifiable storage. 387 | fn nonverifiable_prefix_raw(&self, prefix: &[u8]) -> Self::NonconsensusPrefixRawStream { 388 | let span = Span::current(); 389 | let rocksdb_snapshot = self.0.snapshot.clone(); 390 | let db = self.0.db.clone(); 391 | 392 | let (truncated_prefix, config) = self.0.multistore_cache.config.match_prefix_bytes(prefix); 393 | tracing::trace!(substore_key = ?truncated_prefix, substore_prefix = config.prefix, prefix_supplied = ?prefix, "matched prefix, fetching substore"); 394 | let version = self 395 | .substore_version(&config) 396 | .expect("the substore exists and has been initialized"); 397 | 398 | let substore = store::substore::SubstoreSnapshot { 399 | config, 400 | rocksdb_snapshot, 401 | version, 402 | db, 403 | }; 404 | 405 | let mut options = rocksdb::ReadOptions::default(); 406 | options.set_iterate_range(rocksdb::PrefixRange(truncated_prefix)); 407 | let mode = rocksdb::IteratorMode::Start; 408 | 409 | let (tx_prefix_query, rx_prefix_query) = mpsc::channel(10); 410 | 411 | tokio::task::spawn_blocking(move || { 412 | span.in_scope(|| { 413 | let cf_nonverifiable = substore.config.cf_nonverifiable(&substore.db); 414 | let iter = 415 | substore 416 | .rocksdb_snapshot 417 | .iterator_cf_opt(cf_nonverifiable, options, mode); 418 | let substore_prefix = substore.config.prefix.as_bytes().to_vec(); 419 | for i in iter { 420 | let (boxed_key, boxed_value) = i?; 421 | let key: Vec = boxed_key.into(); 422 | let value: Vec = boxed_value.into(); 423 | 424 | // Costly to do on every iteration, but this should be dwarfed by the 425 | // context switch to the tokio runtime. 426 | let mut full_key: Vec = vec![]; 427 | if substore_prefix.is_empty() { 428 | full_key.extend(key); 429 | } else { 430 | full_key.extend(substore_prefix.clone()); 431 | full_key.extend(iter::once(b'/')); 432 | full_key.extend(key); 433 | } 434 | 435 | tx_prefix_query.blocking_send(Ok((full_key, value)))?; 436 | } 437 | anyhow::Ok(()) 438 | }) 439 | }); 440 | 441 | tokio_stream::wrappers::ReceiverStream::new(rx_prefix_query) 442 | } 443 | 444 | /// Returns a stream of all key-value pairs with the given prefix, and range 445 | /// from nonverifiable storage. 446 | /// **Important**: Only supports range queries over the main store. 447 | fn nonverifiable_range_raw( 448 | &self, 449 | prefix: Option<&[u8]>, 450 | range: impl std::ops::RangeBounds>, 451 | ) -> anyhow::Result { 452 | let span = Span::current(); 453 | let rocksdb_snapshot = self.0.snapshot.clone(); 454 | let db = self.0.db.clone(); 455 | 456 | let (prefix, config) = self 457 | .0 458 | .multistore_cache 459 | .config 460 | .route_key_bytes(prefix.unwrap_or_default()); 461 | 462 | let version = self 463 | .substore_version(&config) 464 | .expect("the substore exists and has been initialized"); 465 | 466 | let substore = store::substore::SubstoreSnapshot { 467 | config, 468 | rocksdb_snapshot, 469 | version, 470 | db, 471 | }; 472 | 473 | let (_range, (start, end)) = crate::utils::convert_bounds(range)?; 474 | let mut options = rocksdb::ReadOptions::default(); 475 | 476 | let (start, end) = (start.unwrap_or_default(), end.unwrap_or_default()); 477 | let end_is_empty = end.is_empty(); 478 | 479 | let mut prefix_start = Vec::with_capacity(prefix.len() + start.len()); 480 | let mut prefix_end = Vec::with_capacity(prefix.len() + end.len()); 481 | 482 | prefix_start.extend(prefix); 483 | prefix_start.extend(start); 484 | prefix_end.extend(prefix); 485 | prefix_end.extend(end); 486 | 487 | tracing::debug!( 488 | ?prefix_start, 489 | ?prefix_end, 490 | ?prefix, 491 | "nonverifiable_range_raw" 492 | ); 493 | 494 | options.set_iterate_lower_bound(prefix_start); 495 | 496 | // Our range queries implementation relies on forward iteration, which 497 | // means that if the upper key is unbounded and a prefix has been set 498 | // we cannot set the upper bound to the prefix. This is because the 499 | // prefix is used as a lower bound for the iterator, and the upper bound 500 | // is used to stop the iteration. 501 | // If we set the upper bound to the prefix, we would get a range consisting of: 502 | // ``` 503 | // "compactblock/001" to "compactblock/" 504 | // ``` 505 | // which would not return anything. 506 | if !end_is_empty { 507 | options.set_iterate_upper_bound(prefix_end); 508 | } 509 | 510 | let mode = rocksdb::IteratorMode::Start; 511 | let prefix = prefix.to_vec(); 512 | 513 | let (tx, rx) = mpsc::channel::, Vec)>>(10); 514 | tokio::task::spawn_blocking(move || { 515 | span.in_scope(|| { 516 | let cf_nonverifiable = substore.config.cf_nonverifiable(&substore.db); 517 | let iter = 518 | substore 519 | .rocksdb_snapshot 520 | .iterator_cf_opt(cf_nonverifiable, options, mode); 521 | 522 | for i in iter { 523 | let (key, value) = i?; 524 | 525 | // This is a bit of a hack, but RocksDB doesn't let us express the "prefixed range-queries", 526 | // that we want to support. In particular, we want to be able to do a prefix query that starts 527 | // at a particular key, and does not have an upper bound. Since we can't create an iterator that 528 | // cover this range, we have to filter out the keys that don't match the prefix. 529 | if !prefix.is_empty() && !key.starts_with(&prefix) { 530 | break; 531 | } 532 | tx.blocking_send(Ok((key.into(), value.into())))?; 533 | } 534 | Ok::<(), anyhow::Error>(()) 535 | }) 536 | }); 537 | 538 | Ok(tokio_stream::wrappers::ReceiverStream::new(rx)) 539 | } 540 | 541 | fn object_get(&self, _key: &str) -> Option { 542 | // No-op -- this will never be called internally, and `Snapshot` is not exposed in public API 543 | None 544 | } 545 | 546 | fn object_type(&self, _key: &str) -> Option { 547 | // No-op -- this will never be called internally, and `Snapshot` is not exposed in public API 548 | None 549 | } 550 | } 551 | 552 | impl std::fmt::Debug for Snapshot { 553 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 554 | f.debug_struct("Snapshot") 555 | .field("version", &self.0.version) 556 | .finish_non_exhaustive() 557 | } 558 | } 559 | -------------------------------------------------------------------------------- /src/snapshot/rocks_wrapper.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | use std::ops::Deref; 3 | use std::sync::Arc; 4 | 5 | /// A wrapper type that acts as a `rocksdb::Snapshot` of an `Arc`'d database 6 | /// handle. 7 | /// 8 | /// This works around a limitation of the `rocksdb` API: the `rocksdb::Snapshot` 9 | /// can only take a borrowed database handle, not an `Arc`'d one, so the 10 | /// lifetime of the `rocksdb::Snapshot` is bound to the lifetime of the borrowed 11 | /// handle. Instead, this wrapper type bundles an `Arc`'d handle together with 12 | /// the `rocksdb::Snapshot`, so that the database is guaranteed to live at least 13 | /// as long as any snapshot of it. 14 | pub struct RocksDbSnapshot { 15 | /// The snapshot itself. It's not really `'static`, so it's on us to ensure 16 | /// that the database stays live as long as the snapshot does. 17 | inner: rocksdb::Snapshot<'static>, 18 | /// The raw pointer form of the Arc we use to guarantee the database 19 | /// lives at least as long as the snapshot. We create this from the Arc 20 | /// in the constructor, pass it to the snapshot on creation, and then 21 | /// convert it back into an Arc in the drop impl to decrement the refcount. 22 | /// 23 | /// Arc::into_raw consumes the Arc instance but does not decrement the 24 | /// refcount. This means that we cannot accidentally drop the Arc while 25 | /// using the raw pointer. Instead, we must explicitly convert the raw 26 | /// pointer back into an Arc when we're finished using it, and only then 27 | /// drop it. 28 | raw_db: *const rocksdb::DB, 29 | } 30 | 31 | impl Debug for RocksDbSnapshot { 32 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 33 | f.debug_struct("RocksDbSnapshot").finish() 34 | } 35 | } 36 | 37 | // Safety requires that the inner snapshot instance must never live longer than 38 | // the wrapper. We're assured that this is the case, because we only return a 39 | // borrow of the inner snapshot, and because `rocksdb::Snapshot` is neither 40 | // `Copy` nor `Clone`. 41 | // 42 | // We're also reasonably certain that the upstream crate will not add such an 43 | // implementation in the future, because its drop impl is used to make the FFI 44 | // call that discards the in-memory snapshot, so it would not be safe to add 45 | // such an implementation. 46 | impl Deref for RocksDbSnapshot { 47 | type Target = rocksdb::Snapshot<'static>; 48 | 49 | fn deref(&self) -> &Self::Target { 50 | &self.inner 51 | } 52 | } 53 | 54 | impl RocksDbSnapshot { 55 | /// Creates a new snapshot of the given `db`. 56 | pub fn new(db: Arc) -> Self { 57 | // First, convert the Arc into a raw pointer. 58 | let raw_db = Arc::into_raw(db); 59 | // Next, use the raw pointer to construct a &DB instance with a fake 60 | // 'static lifetime, and use that instance to construct the inner 61 | // Snapshot. 62 | let static_db: &'static rocksdb::DB = unsafe { &*raw_db }; 63 | let inner = rocksdb::Snapshot::new(static_db); 64 | 65 | Self { inner, raw_db } 66 | } 67 | } 68 | 69 | impl Drop for RocksDbSnapshot { 70 | fn drop(&mut self) { 71 | // Now that we know we're finished with the `Snapshot`, we can 72 | // reconstruct the `Arc` and drop it, to decrement the DB refcount. 73 | unsafe { 74 | let db = Arc::from_raw(self.raw_db); 75 | std::mem::drop(db); 76 | } 77 | } 78 | } 79 | 80 | /// The `Send` implementation is safe because the `rocksdb::Snapshot` is `Send`. 81 | unsafe impl Send for RocksDbSnapshot {} 82 | /// The `Sync` implementation is safe because the `rocksdb::Snapshot` is `Sync`. 83 | unsafe impl Sync for RocksDbSnapshot {} 84 | -------------------------------------------------------------------------------- /src/snapshot_cache.rs: -------------------------------------------------------------------------------- 1 | use crate::Snapshot; 2 | use std::{cmp, collections::VecDeque}; 3 | 4 | /// A circular cache for storing [`Snapshot`]s. 5 | /// 6 | /// # Usage 7 | /// 8 | /// [`Snapshot`]s are inserted in the cache using the [`push`] or [`try_push`] 9 | /// methods. If the cache is full, the oldest entry will be evicted to make space 10 | /// for the newer entry. 11 | /// 12 | /// # Constraints 13 | /// 14 | /// [`Snapshot`]s must be inserted sequentially relative to their [`jmt::Version`] 15 | /// numbers, and have consecutive version numbers. 16 | pub struct SnapshotCache { 17 | /// A sequence of increasingly recent [`Snapshot`]s. 18 | cache: VecDeque, 19 | /// The max length and capacity of the cache. 20 | max_size: usize, 21 | } 22 | 23 | impl SnapshotCache { 24 | /// Creates a [`SnapshotCache`] with `max_size` capacity, and inserts an initial `Snapshot` in 25 | /// it. If the specified capacity is zero, the cache will default to having size 1. 26 | pub fn new(initial: Snapshot, max_size: usize) -> Self { 27 | let max_size = cmp::max(max_size, 1); 28 | let mut cache = VecDeque::with_capacity(max_size); 29 | cache.push_front(initial); 30 | 31 | Self { cache, max_size } 32 | } 33 | 34 | /// Attempts to insert a [`Snapshot`] entry into the cache. If the cache is full, the oldest 35 | /// entry will be evicted to make space. 36 | /// 37 | /// [`Snapshot`]s must be inserted sequentially relative to their `jmt::Version`s and have 38 | /// consecutive version numbers. 39 | /// 40 | /// ## Errors 41 | /// 42 | /// The method will return an error if the supplied `snapshot` has a version number that is: 43 | /// 44 | /// - stale i.e. older than the latest snapshot 45 | /// 46 | /// - skipping a version i.e. the difference between their version numbers is greater than 1 47 | pub fn try_push(&mut self, snapshot: Snapshot) -> anyhow::Result<()> { 48 | let latest_version = self.latest().version(); 49 | if latest_version.wrapping_add(1) != snapshot.version() { 50 | anyhow::bail!("snapshot_cache: trying to insert stale snapshots."); 51 | } 52 | 53 | if self.cache.len() >= self.max_size { 54 | self.cache.pop_back(); 55 | } 56 | 57 | self.cache.push_front(snapshot); 58 | Ok(()) 59 | } 60 | 61 | /// Returns the latest inserted `Snapshot`. 62 | pub fn latest(&self) -> Snapshot { 63 | self.cache 64 | .front() 65 | .map(Clone::clone) 66 | .expect("snapshot_cache cannot be empty") 67 | } 68 | 69 | /// Attempts to fetch a [`Snapshot`] with a matching `jmt::Version`, and returns `None` if none 70 | /// was found. 71 | pub fn get(&self, version: jmt::Version) -> Option { 72 | let latest_version = self.latest().version(); 73 | // We compute the offset assuming that snapshot entries are cached 74 | // such that the delta between entries is always 1. 75 | let offset = latest_version.wrapping_sub(version) as usize; 76 | self.cache 77 | .get(offset) 78 | .map(Clone::clone) 79 | .filter(|s| s.version() == version) 80 | } 81 | 82 | /// Empties the cache. 83 | pub fn clear(&mut self) { 84 | self.cache.clear(); 85 | } 86 | } 87 | 88 | #[cfg(test)] 89 | mod test { 90 | 91 | use crate::snapshot::Snapshot; 92 | use crate::snapshot_cache::SnapshotCache; 93 | use crate::storage::Storage; 94 | use crate::store::multistore::MultistoreCache; 95 | 96 | async fn create_storage_instance() -> Storage { 97 | use tempfile::tempdir; 98 | // create a storage backend for testing 99 | let dir = tempdir().expect("unable to create tempdir"); 100 | let file_path = dir.path().join("snapshot-cache-testing.db"); 101 | 102 | Storage::load(file_path, vec![]) 103 | .await 104 | .expect("unable to load storage") 105 | } 106 | 107 | #[tokio::test] 108 | /// `SnapshotCache` constructed with zero capacity instead defaults to one. 109 | async fn fail_zero_capacity() { 110 | let storage = create_storage_instance().await; 111 | let db = storage.db(); 112 | let snapshot = storage.latest_snapshot(); 113 | let mut cache = SnapshotCache::new(snapshot, 0); 114 | 115 | // Check that the cache has a capacity at least 1 116 | assert!(cache.get(u64::MAX).is_some()); 117 | let new_snapshot = Snapshot::new(db, 0, MultistoreCache::default()); 118 | cache 119 | .try_push(new_snapshot) 120 | .expect("should not fail to insert a new entry"); 121 | 122 | // Check that the cache has a capacity of exactly 1 123 | assert!(cache.get(u64::MAX).is_none()); 124 | assert!(cache.get(0).is_some()); 125 | } 126 | 127 | #[tokio::test] 128 | /// Fails to insert snapshot entries that are older than the latest' 129 | async fn fail_insert_stale_snapshot() { 130 | let storage = create_storage_instance().await; 131 | let db_handle = storage.db(); 132 | let snapshot = storage.latest_snapshot(); 133 | let mut cache = SnapshotCache::new(snapshot, 1); 134 | let stale_snapshot = Snapshot::new(db_handle, 1, MultistoreCache::default()); 135 | cache 136 | .try_push(stale_snapshot) 137 | .expect_err("should fail to insert a stale entry in the snapshot cache"); 138 | } 139 | 140 | #[tokio::test] 141 | /// Fails to insert snapshot entries that have a version gap. 142 | async fn fail_insert_gapped_snapshot() { 143 | let storage = create_storage_instance().await; 144 | let db_handle = storage.db(); 145 | let snapshot = Snapshot::new(db_handle.clone(), 0, MultistoreCache::default()); 146 | let mut cache = SnapshotCache::new(snapshot, 2); 147 | let snapshot = Snapshot::new(db_handle, 2, MultistoreCache::default()); 148 | cache 149 | .try_push(snapshot) 150 | .expect_err("should fail to insert snapshot with skipped version number"); 151 | } 152 | 153 | #[tokio::test] 154 | /// Checks that we handle pre-genesis `jmt::Version` correctly. 155 | async fn cache_manage_pre_genesis() { 156 | let storage = create_storage_instance().await; 157 | let db_handle = storage.db(); 158 | let snapshot = storage.latest_snapshot(); 159 | 160 | // Create a cache of size 10, populated with one entry with version: u64::MAX 161 | let mut cache = SnapshotCache::new(snapshot, 10); 162 | 163 | // Fill the entire cache by inserting 9 more entries. 164 | for i in 0..9 { 165 | let snapshot = Snapshot::new(db_handle.clone(), i, MultistoreCache::default()); 166 | cache 167 | .try_push(snapshot) 168 | .expect("should not fail to insert a new entry"); 169 | } 170 | 171 | // The cache is full, check that the oldest entry is still in the cache. 172 | assert!(cache.get(u64::MAX).is_some()); 173 | 174 | // Push another snapshot in the cache, this should cause eviction of the oldest entry 175 | // alone. 176 | let new_snapshot = Snapshot::new(db_handle, 9, MultistoreCache::default()); 177 | cache 178 | .try_push(new_snapshot) 179 | .expect("should not fail to insert a new entry"); 180 | 181 | // Check that the pre-genesis entry has been evicted! 182 | assert!(cache.get(u64::MAX).is_none()); 183 | 184 | // Check that all the other entries are still in the cache. 185 | for i in 0..10 { 186 | assert!(cache.get(i).is_some()); 187 | } 188 | } 189 | 190 | #[tokio::test] 191 | /// Checks that inserting on a full cache exclusively evicts the oldest snapshots. 192 | async fn drop_oldest_snapshot() { 193 | let storage = create_storage_instance().await; 194 | let db_handle = storage.db(); 195 | let snapshot = Snapshot::new(db_handle.clone(), 0, MultistoreCache::default()); 196 | 197 | // Create a cache of size 10, populated with a snapshot at version 0. 198 | let mut cache = SnapshotCache::new(snapshot, 10); 199 | 200 | // Saturate the cache by inserting 9 more entries. 201 | for i in 1..10 { 202 | let snapshot = Snapshot::new(db_handle.clone(), i, MultistoreCache::default()); 203 | cache 204 | .try_push(snapshot) 205 | .expect("should be able to insert new entries") 206 | } 207 | 208 | // Check that the oldest value is still present: 209 | assert!(cache.get(0).is_some()); 210 | 211 | // Insert a new value that should overflow the cache. 212 | let snapshot = Snapshot::new(db_handle, 10, MultistoreCache::default()); 213 | cache 214 | .try_push(snapshot) 215 | .expect("should be able to insert a new entry"); 216 | 217 | // Check that the oldest value has been dropped. 218 | assert!(cache.get(0).is_none()); 219 | 220 | // Check that the front of the cache is the latest inserted snapshot. 221 | assert_eq!(cache.latest().version(), 10); 222 | 223 | // Check that all the other snapshots are still present in the cache. 224 | for i in 1..11 { 225 | assert!(cache.get(i).is_some()); 226 | } 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /src/storage.rs: -------------------------------------------------------------------------------- 1 | use std::{path::PathBuf, sync::Arc}; 2 | 3 | use anyhow::{bail, ensure, Result}; 4 | use parking_lot::RwLock; 5 | use rocksdb::{Options, DB}; 6 | use std::collections::HashMap; 7 | use tokio::sync::watch; 8 | use tracing::Span; 9 | 10 | use crate::{ 11 | cache::Cache, 12 | snapshot::Snapshot, 13 | store::{ 14 | multistore::{self, MultistoreConfig}, 15 | substore::{SubstoreConfig, SubstoreSnapshot, SubstoreStorage}, 16 | }, 17 | }; 18 | use crate::{snapshot_cache::SnapshotCache, StagedWriteBatch, StateDelta}; 19 | 20 | mod temp; 21 | pub use temp::TempStorage; 22 | 23 | /// A handle for a storage instance, backed by RocksDB. 24 | /// 25 | /// The handle is cheaply clonable; all clones share the same backing data store. 26 | #[derive(Clone)] 27 | pub struct Storage(Arc); 28 | 29 | impl std::fmt::Debug for Storage { 30 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 31 | f.debug_struct("Storage").finish_non_exhaustive() 32 | } 33 | } 34 | 35 | // A private inner element to prevent the `TreeWriter` implementation 36 | // from leaking outside of this crate. 37 | struct Inner { 38 | dispatcher_tx: watch::Sender<(Snapshot, (jmt::Version, Arc))>, 39 | snapshot_rx: watch::Receiver, 40 | changes_rx: watch::Receiver<(jmt::Version, Arc)>, 41 | snapshots: RwLock, 42 | multistore_config: MultistoreConfig, 43 | /// A handle to the dispatcher task. 44 | /// This is used by `Storage::release` to wait for the task to terminate. 45 | jh_dispatcher: Option>, 46 | db: Arc, 47 | } 48 | 49 | impl Storage { 50 | /// Loads a storage instance from the given path, initializing it if necessary. 51 | pub async fn load(path: PathBuf, default_prefixes: Vec) -> Result { 52 | let span = Span::current(); 53 | let db_path = path.clone(); 54 | // initializing main storage instance. 55 | let prefixes = tokio::task::spawn_blocking(move || { 56 | span.in_scope(|| { 57 | let mut opts = Options::default(); 58 | opts.create_if_missing(true); 59 | opts.create_missing_column_families(true); 60 | tracing::info!(?path, "opening rocksdb config column"); 61 | 62 | // Hack(erwan): RocksDB requires us to specify all the column families 63 | // that we want to use upfront. This is problematic when we are initializing 64 | // a new database, because the call to `DBCommon::list_cf` will fail 65 | // if the database manifest is not found. To work around this, we ignore 66 | // the error and assume that the database is empty. 67 | // Tracked in: https://github.com/rust-rocksdb/rust-rocksdb/issues/608 68 | let mut columns = DB::list_cf(&opts, path.clone()).unwrap_or_default(); 69 | if columns.is_empty() { 70 | columns.push("config".to_string()); 71 | } 72 | 73 | let db = DB::open_cf(&opts, path, columns).expect("can open database"); 74 | let cf_config = db 75 | .cf_handle("config") 76 | .expect("config column family is created if missing"); 77 | let config_iter = db.iterator_cf(cf_config, rocksdb::IteratorMode::Start); 78 | let mut prefixes = Vec::new(); 79 | tracing::info!("reading prefixes from config column family"); 80 | for i in config_iter { 81 | let (key, _) = i.expect("can read from iterator"); 82 | prefixes.push(String::from_utf8(key.to_vec()).expect("prefix is utf8")); 83 | } 84 | 85 | for prefix in default_prefixes { 86 | if !prefixes.contains(&prefix) { 87 | db.put_cf(cf_config, prefix.as_bytes(), b"") 88 | .expect("can write to db"); 89 | prefixes.push(prefix); 90 | } 91 | } 92 | 93 | std::mem::drop(db); 94 | prefixes 95 | }) 96 | }) 97 | .await?; 98 | 99 | Storage::init(db_path, prefixes).await 100 | } 101 | 102 | /// Initializes a new storage instance at the given path. Takes a list of default prefixes 103 | /// to initialize the storage configuration with. 104 | /// Here is a high-level overview of the initialization process: 105 | /// 1. Create a new RocksDB instance at the given path. 106 | /// 2. Read the prefix list and create a [`SubstoreConfig`] for each prefix. 107 | /// 3. Create a new [`MultistoreConfig`] from supplied prefixes. 108 | /// 4. Initialize the substore cache with the latest version of each substore. 109 | /// 5. Spawn a dispatcher task that forwards new snapshots to subscribers. 110 | pub async fn init(path: PathBuf, prefixes: Vec) -> Result { 111 | let span = Span::current(); 112 | 113 | tokio::task 114 | ::spawn_blocking(move || { 115 | span.in_scope(|| { 116 | let mut substore_configs = Vec::new(); 117 | tracing::info!("initializing global store config"); 118 | let main_store = Arc::new(SubstoreConfig::new("")); 119 | for substore_prefix in prefixes { 120 | tracing::info!(prefix = ?substore_prefix, "creating substore config for prefix"); 121 | if substore_prefix.is_empty() { 122 | bail!("the empty prefix is reserved") 123 | } 124 | substore_configs.push(Arc::new(SubstoreConfig::new(substore_prefix))); 125 | } 126 | 127 | let multistore_config = MultistoreConfig { 128 | main_store: main_store.clone(), 129 | substores: substore_configs.clone(), 130 | }; 131 | 132 | let mut substore_columns: Vec<&String> = substore_configs 133 | .iter() 134 | .flat_map(|config| config.columns()) 135 | .collect(); 136 | let mut columns: Vec<&String> = main_store.columns().collect(); 137 | columns.append(&mut substore_columns); 138 | 139 | tracing::info!(?path, "opening rocksdb"); 140 | let cf_config_string = "config".to_string(); 141 | // RocksDB setup: define options, collect all the columns, and open the database. 142 | // Each substore defines a prefix and its own set of columns. 143 | // See [`crate::store::SubstoreConfig`] for more details. 144 | let mut opts = Options::default(); 145 | opts.create_if_missing(true); 146 | opts.create_missing_column_families(true); 147 | columns.push(&cf_config_string); 148 | 149 | let db = DB::open_cf(&opts, path, columns)?; 150 | let shared_db = Arc::new(db); 151 | 152 | // Initialize the substore cache with the latest version of each substore. 153 | // Note: for compatibility reasons with Tendermint/CometBFT, we set the "pre-genesis" 154 | // jmt version to be u64::MAX, corresponding to -1 mod 2^64. 155 | let jmt_version = main_store 156 | .latest_version_from_db(&shared_db)? 157 | .unwrap_or(u64::MAX); 158 | 159 | let mut multistore_cache = 160 | multistore::MultistoreCache::from_config(multistore_config.clone()); 161 | 162 | for substore_config in substore_configs { 163 | let substore_version = substore_config 164 | .latest_version_from_db(&shared_db)? 165 | .unwrap_or(u64::MAX); 166 | 167 | multistore_cache.set_version(substore_config.clone(), substore_version); 168 | tracing::debug!( 169 | substore_prefix = ?substore_config.prefix, 170 | ?substore_version, 171 | "initializing substore" 172 | ); 173 | } 174 | 175 | multistore_cache.set_version(main_store, jmt_version); 176 | tracing::debug!(?jmt_version, "initializing main store"); 177 | 178 | let latest_snapshot = 179 | Snapshot::new(shared_db.clone(), jmt_version, multistore_cache); 180 | 181 | // A concurrent-safe ring buffer of the latest 10 snapshots. 182 | let snapshots = RwLock::new(SnapshotCache::new(latest_snapshot.clone(), 10)); 183 | 184 | // Setup a dispatcher task that acts as an intermediary between the storage 185 | // and the rest of the system. Its purpose is to forward new snapshots to 186 | // subscribers. 187 | // 188 | // If we were to send snapshots directly to subscribers, a slow subscriber could 189 | // hold a lock on the watch channel for too long, and block the consensus-critical 190 | // commit logic, which needs to acquire a write lock on the watch channel. 191 | // 192 | // Instead, we "proxy" through a dispatcher task that copies values from one 193 | // channel to the other, ensuring that if an API consumer misuses the watch 194 | // channels, it will only affect other subscribers, not the commit logic. 195 | 196 | let (snapshot_tx, snapshot_rx) = watch::channel(latest_snapshot.clone()); 197 | // Note: this will never be seen by consumers, since we mark the current value as seen 198 | // before returning the receiver. 199 | let dummy_cache = (u64::MAX, Arc::new(Cache::default())); 200 | let (changes_tx, changes_rx) = watch::channel(dummy_cache.clone()); 201 | let (tx_dispatcher, mut rx_dispatcher) = watch::channel((latest_snapshot, dummy_cache)); 202 | 203 | let jh_dispatcher = tokio::spawn(async move { 204 | tracing::info!("snapshot dispatcher task has started"); 205 | // If the sender is dropped, the task will terminate. 206 | while rx_dispatcher.changed().await.is_ok() { 207 | tracing::debug!("dispatcher has received a new snapshot"); 208 | let (snapshot, changes) = rx_dispatcher.borrow_and_update().clone(); 209 | // [`watch::Sender::send`] only returns an error if there are no 210 | // receivers, so we can safely ignore the result here. 211 | let _ = snapshot_tx.send(snapshot); 212 | let _ = changes_tx.send(changes); 213 | } 214 | tracing::info!("dispatcher task has terminated") 215 | }); 216 | 217 | Ok(Self(Arc::new(Inner { 218 | // We don't need to wrap the task in a `CancelOnDrop` because 219 | // the task will stop when the sender is dropped. However, certain 220 | // test scenarios require us to wait that all resources are released. 221 | jh_dispatcher: Some(jh_dispatcher), 222 | dispatcher_tx: tx_dispatcher, 223 | snapshot_rx, 224 | changes_rx, 225 | multistore_config, 226 | snapshots, 227 | db: shared_db, 228 | }))) 229 | }) 230 | }) 231 | .await? 232 | } 233 | 234 | /// Returns the latest version (block height) of the tree recorded by the 235 | /// `Storage`. 236 | /// 237 | /// If the tree is empty and has not been initialized, returns `u64::MAX`. 238 | pub fn latest_version(&self) -> jmt::Version { 239 | self.latest_snapshot().version() 240 | } 241 | 242 | /// Returns a [`watch::Receiver`] that can be used to subscribe to new state versions. 243 | pub fn subscribe(&self) -> watch::Receiver { 244 | let mut rx = self.0.snapshot_rx.clone(); 245 | // Mark the current value as seen, so that the user of the receiver 246 | // will only be notified of *subsequent* values. 247 | rx.borrow_and_update(); 248 | rx 249 | } 250 | 251 | /// Returns a [`watch::Receiver`] that can be used to subscribe to state changes. 252 | pub fn subscribe_changes(&self) -> watch::Receiver<(jmt::Version, Arc)> { 253 | let mut rx = self.0.changes_rx.clone(); 254 | // Mark the current value as seen, so that the user of the receiver 255 | // will only be notified of *subsequent* values. 256 | rx.borrow_and_update(); 257 | rx 258 | } 259 | 260 | /// Returns a new [`Snapshot`] on top of the latest version of the tree. 261 | pub fn latest_snapshot(&self) -> Snapshot { 262 | self.0.snapshots.read().latest() 263 | } 264 | 265 | /// Fetches the [`Snapshot`] corresponding to the supplied `jmt::Version` from 266 | /// the [`SnapshotCache`]. Returns `None` if no match was found. 267 | pub fn snapshot(&self, version: jmt::Version) -> Option { 268 | self.0.snapshots.read().get(version) 269 | } 270 | 271 | /// Prepares a commit for the provided [`StateDelta`], returning a [`StagedWriteBatch`]. 272 | /// The batch can be committed to the database using the [`Storage::commit_batch`] method. 273 | pub async fn prepare_commit(&self, delta: StateDelta) -> Result { 274 | // Extract the snapshot and the changes from the state delta 275 | let (snapshot, changes) = delta.flatten(); 276 | let prev_snapshot_version = snapshot.version(); 277 | 278 | // We use wrapping_add here so that we can write `new_version = 0` by 279 | // overflowing `PRE_GENESIS_VERSION`. 280 | let prev_storage_version = self.latest_version(); 281 | let next_storage_version = prev_storage_version.wrapping_add(1); 282 | tracing::debug!(prev_storage_version, next_storage_version); 283 | 284 | ensure!( 285 | prev_storage_version == prev_snapshot_version, 286 | "trying to prepare a commit for a delta forked from version {}, but the latest version is {}", 287 | prev_snapshot_version, 288 | prev_storage_version 289 | ); 290 | 291 | self.prepare_commit_inner(snapshot, changes, next_storage_version, false) 292 | .await 293 | } 294 | 295 | async fn prepare_commit_inner( 296 | &self, 297 | snapshot: Snapshot, 298 | cache: Cache, 299 | version: jmt::Version, 300 | perform_migration: bool, 301 | ) -> Result { 302 | tracing::debug!(new_jmt_version = ?version, "preparing to commit state delta"); 303 | // Save a copy of the changes to send to subscribers later. 304 | let changes = Arc::new(cache.clone_changes()); 305 | 306 | let mut changes_by_substore = cache.shard_by_prefix(&self.0.multistore_config); 307 | #[allow(clippy::disallowed_types)] 308 | let mut substore_roots = HashMap::new(); 309 | let mut multistore_versions = 310 | multistore::MultistoreCache::from_config(self.0.multistore_config.clone()); 311 | 312 | let db = self.0.db.clone(); 313 | let rocksdb_snapshot = snapshot.0.snapshot.clone(); 314 | 315 | let mut new_versions = vec![]; 316 | 317 | // We use a single write batch to commit all the substores at once. Each task will append 318 | // its own changes to the batch, and we will commit it at the end. 319 | let mut write_batch = rocksdb::WriteBatch::default(); 320 | 321 | // Note(erwan): Here, we spawn a commit task for each substore. 322 | // The substore keyspaces are disjoint, so conceptually it is 323 | // fine to rewrite it using a [`tokio::task::JoinSet`]. 324 | // The reason this isn't done is because `rocksdb::WriteBatch` 325 | // is _not_ thread-safe. 326 | // 327 | // This means that to spin-up N tasks, we would need to use a 328 | // single batch wrapped in a mutex, or use N batches, and find 329 | // a way to commit to them atomically. This isn't supported by 330 | // RocksDB which leaves one option: to iterate over each entry 331 | // in each batch, and merge them together. At this point, this 332 | // is probably not worth it. 333 | // 334 | // Another option is to trade atomicity for parallelism by producing 335 | // N batches, and committing them in distinct atomic writes. This is 336 | // potentially faster, but it is also more dangerous, because if one 337 | // of the writes fails, we are left with a partially committed state. 338 | // 339 | // The current implementation leans on the fact that the number of 340 | // substores is small, and that the synchronization overhead of a joinset 341 | // would exceed its benefits. This works well for now. 342 | for config in self.0.multistore_config.iter() { 343 | tracing::debug!(substore_prefix = ?config.prefix, "processing substore"); 344 | // If the substore is empty, we need to fetch its initialized version from the cache. 345 | let old_substore_version = config 346 | .latest_version_from_snapshot(&db, &rocksdb_snapshot)? 347 | .unwrap_or_else(|| { 348 | tracing::debug!("substore is empty, fetching initialized version from cache"); 349 | snapshot 350 | .substore_version(config) 351 | .expect("prefix should be initialized") 352 | }); 353 | 354 | let Some(changeset) = changes_by_substore.remove(config) else { 355 | tracing::debug!(prefix = config.prefix, "no changes for substore, skipping"); 356 | multistore_versions.set_version(config.clone(), old_substore_version); 357 | continue; 358 | }; 359 | 360 | let new_version = if perform_migration { 361 | old_substore_version 362 | } else { 363 | old_substore_version.wrapping_add(1) 364 | }; 365 | new_versions.push(new_version); 366 | let substore_snapshot = SubstoreSnapshot { 367 | config: config.clone(), 368 | rocksdb_snapshot: rocksdb_snapshot.clone(), 369 | version: new_version, 370 | db: db.clone(), 371 | }; 372 | 373 | let substore_storage = SubstoreStorage { substore_snapshot }; 374 | 375 | // Commit the substore and collect its root hash 376 | let (root_hash, substore_batch) = substore_storage 377 | .commit(changeset, write_batch, new_version, perform_migration) 378 | .await?; 379 | write_batch = substore_batch; 380 | 381 | tracing::debug!( 382 | ?root_hash, 383 | prefix = config.prefix, 384 | ?version, 385 | "added substore to write batch" 386 | ); 387 | substore_roots.insert(config.clone(), (root_hash, new_version)); 388 | 389 | tracing::debug!( 390 | ?root_hash, 391 | prefix = ?config.prefix, 392 | ?new_version, 393 | "updating substore version" 394 | ); 395 | multistore_versions.set_version(config.clone(), new_version); 396 | } 397 | 398 | // Add substore roots to the main store changeset 399 | let main_store_config = self.0.multistore_config.main_store.clone(); 400 | let mut main_store_changes = changes_by_substore 401 | .remove(&main_store_config) 402 | .unwrap_or_else(|| { 403 | tracing::debug!("no changes for main store, creating empty changeset"); 404 | Cache::default() 405 | }); 406 | 407 | for (config, (root_hash, _)) in substore_roots.iter() { 408 | main_store_changes 409 | .unwritten_changes 410 | .insert(config.prefix.to_string(), Some(root_hash.0.to_vec())); 411 | } 412 | 413 | // Commit the main store and collect the global root hash 414 | let main_store_snapshot = SubstoreSnapshot { 415 | config: main_store_config.clone(), 416 | rocksdb_snapshot: snapshot.0.snapshot.clone(), 417 | version, 418 | db: self.0.db.clone(), 419 | }; 420 | 421 | let main_store_storage = SubstoreStorage { 422 | substore_snapshot: main_store_snapshot, 423 | }; 424 | 425 | let (global_root_hash, write_batch) = main_store_storage 426 | .commit(main_store_changes, write_batch, version, perform_migration) 427 | .await?; 428 | tracing::debug!( 429 | ?global_root_hash, 430 | ?version, 431 | "added main store to write batch" 432 | ); 433 | 434 | tracing::debug!(?global_root_hash, version = ?version, "updating main store version"); 435 | let main_store_config = self.0.multistore_config.main_store.clone(); 436 | multistore_versions.set_version(main_store_config, version); 437 | 438 | Ok(StagedWriteBatch { 439 | write_batch, 440 | version, 441 | multistore_versions, 442 | root_hash: global_root_hash, 443 | substore_roots, 444 | perform_migration, 445 | changes, 446 | }) 447 | } 448 | 449 | /// Commits the provided [`StateDelta`] to persistent storage as the latest 450 | /// version of the chain state. 451 | pub async fn commit(&self, delta: StateDelta) -> Result { 452 | let batch = self.prepare_commit(delta).await?; 453 | self.commit_batch(batch) 454 | } 455 | 456 | /// Commits the supplied [`StagedWriteBatch`] to persistent storage. 457 | /// 458 | /// # Migrations 459 | /// In the case of chain state migrations we need to commit the new state 460 | /// without incrementing the version. If `perform_migration` is `true` the 461 | /// snapshot will _not_ be written to the snapshot cache, and no subscribers 462 | /// will be notified. Substore versions will not be updated. 463 | pub fn commit_batch(&self, batch: StagedWriteBatch) -> Result { 464 | let StagedWriteBatch { 465 | write_batch, 466 | version, 467 | multistore_versions, 468 | root_hash: global_root_hash, 469 | substore_roots, 470 | perform_migration, 471 | changes, 472 | } = batch; 473 | 474 | let db = self.0.db.clone(); 475 | 476 | // check that the version of the batch being committed is the correct next version 477 | let old_version = self.latest_version(); 478 | let expected_new_version = if perform_migration { 479 | old_version 480 | } else { 481 | old_version.wrapping_add(1) 482 | }; 483 | 484 | ensure!( 485 | expected_new_version == version, 486 | "new version mismatch: expected {} but got {}", 487 | expected_new_version, 488 | version 489 | ); 490 | 491 | // also check that each of the substore versions are the correct next version 492 | let snapshot = self.latest_snapshot(); 493 | 494 | // Warning: we MUST check version coherence for **every** substore. 495 | // These checks are a second line of defense. They must consider 496 | // the case when two deltas effect distinct substores. 497 | // 498 | // version: (m, ss_1, ss_2) 499 | // D_0: (_, 1, 0) <- initial state 500 | // D_1: (A, 1, 1) <- multiwrite to ss_1 AND ss_2 501 | // D_1*: (A, 1, 0) <- isolate write to ss_1 502 | // 503 | // A comprehensive check lets us catch the stale write D_1* even if 504 | // locally it does not directly effect the second substore at all. 505 | // And even if the main version check passes (spuriously, or because of 506 | // a migration). 507 | for (substore_config, new_version) in &multistore_versions.substores { 508 | if substore_config.prefix.is_empty() { 509 | // this is the main store, ignore 510 | continue; 511 | } 512 | 513 | let old_substore_version = snapshot 514 | .substore_version(substore_config) 515 | .expect("substores must be initialized at startup"); 516 | 517 | // if the substore exists in `substore_roots`, there have been updates to the substore. 518 | // if `perform_migration` is false and there are updates, the next version should be previous + 1. 519 | // otherwise, the version should remain the same. 520 | let expected_substore_version = 521 | if substore_roots.get(substore_config).is_some() && !perform_migration { 522 | old_substore_version.wrapping_add(1) 523 | } else { 524 | old_substore_version 525 | }; 526 | 527 | ensure!( 528 | expected_substore_version == *new_version, 529 | "substore new version mismatch for substore with prefix {}: expected {} but got {}", 530 | substore_config.prefix, 531 | expected_substore_version, 532 | new_version 533 | ); 534 | } 535 | 536 | tracing::debug!(new_jmt_version = ?batch.version, "committing batch to db"); 537 | 538 | db.write(write_batch).expect("can write to db"); 539 | tracing::debug!( 540 | ?global_root_hash, 541 | ?version, 542 | "committed main store and substores to db" 543 | ); 544 | 545 | // If we're not performing a migration, we should update the snapshot cache 546 | if !perform_migration { 547 | tracing::debug!("updating snapshot cache"); 548 | 549 | let latest_snapshot = Snapshot::new(db.clone(), version, multistore_versions); 550 | // Obtain a write lock to the snapshot cache, and push the latest snapshot 551 | // available. The lock guard is implicitly dropped immediately. 552 | self.0 553 | .snapshots 554 | .write() 555 | .try_push(latest_snapshot.clone()) 556 | .expect("should process snapshots with consecutive jmt versions"); 557 | 558 | tracing::debug!(?version, "dispatching snapshot"); 559 | 560 | // Send fails if the channel is closed (i.e., if there are no receivers); 561 | // in this case, we should ignore the error, we have no one to notify. 562 | let _ = self 563 | .0 564 | .dispatcher_tx 565 | .send((latest_snapshot, (version, changes))); 566 | } else { 567 | tracing::debug!("skipping snapshot cache update"); 568 | } 569 | 570 | Ok(global_root_hash) 571 | } 572 | 573 | #[cfg(feature = "migration")] 574 | /// Commit the provided [`StateDelta`] to persistent storage without increasing the version 575 | /// of the chain state, and skips the snapshot cache update. 576 | pub async fn commit_in_place(&self, delta: StateDelta) -> Result { 577 | let (snapshot, changes) = delta.flatten(); 578 | let old_version = self.latest_version(); 579 | let batch = self 580 | .prepare_commit_inner(snapshot, changes, old_version, true) 581 | .await?; 582 | self.commit_batch(batch) 583 | } 584 | 585 | /// Returns the internal handle to RocksDB, this is useful to test adjacent storage crates. 586 | #[cfg(test)] 587 | pub(crate) fn db(&self) -> Arc { 588 | self.0.db.clone() 589 | } 590 | 591 | /// Shuts down the database and the dispatcher task, and waits for all resources to be reclaimed. 592 | /// Panics if there are still outstanding references to the `Inner` storage. 593 | pub async fn release(mut self) { 594 | if let Some(inner) = Arc::get_mut(&mut self.0) { 595 | inner.shutdown().await; 596 | inner.snapshots.write().clear(); 597 | // `Inner` is dropped once the call completes. 598 | } else { 599 | panic!("Unable to get mutable reference to Inner"); 600 | } 601 | } 602 | } 603 | 604 | impl Inner { 605 | pub(crate) async fn shutdown(&mut self) { 606 | if let Some(jh) = self.jh_dispatcher.take() { 607 | jh.abort(); 608 | let _ = jh.await; 609 | } 610 | } 611 | } 612 | -------------------------------------------------------------------------------- /src/storage/temp.rs: -------------------------------------------------------------------------------- 1 | use crate::Storage; 2 | use std::ops::Deref; 3 | use tempfile::TempDir; 4 | 5 | /// A [`Storage`] instance backed by a [`tempfile::TempDir`] for testing. 6 | /// 7 | /// The `TempDir` handle is bundled into the `TempStorage`, so the temporary 8 | /// directory is cleaned up when the `TempStorage` instance is dropped. 9 | pub struct TempStorage { 10 | inner: Storage, 11 | _dir: TempDir, 12 | } 13 | 14 | impl Deref for TempStorage { 15 | type Target = Storage; 16 | fn deref(&self) -> &Self::Target { 17 | &self.inner 18 | } 19 | } 20 | 21 | impl AsRef for TempStorage { 22 | fn as_ref(&self) -> &Storage { 23 | &self.inner 24 | } 25 | } 26 | 27 | impl TempStorage { 28 | pub async fn new() -> anyhow::Result { 29 | let dir = tempfile::tempdir()?; 30 | let db_filepath = dir.path().join("storage.db"); 31 | let inner = Storage::load(db_filepath.clone(), vec![]).await?; 32 | 33 | Ok(TempStorage { inner, _dir: dir }) 34 | } 35 | 36 | pub async fn new_with_prefixes(prefixes: Vec) -> anyhow::Result { 37 | let dir = tempfile::tempdir()?; 38 | let db_filepath = dir.path().join("storage.db"); 39 | let inner = Storage::load(db_filepath.clone(), prefixes).await?; 40 | 41 | Ok(TempStorage { inner, _dir: dir }) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/store.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod multistore; 2 | pub(crate) mod substore; 3 | -------------------------------------------------------------------------------- /src/store/multistore.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Display, sync::Arc}; 2 | 3 | use super::substore::SubstoreConfig; 4 | 5 | /// A collection of substore, each with a unique prefix. 6 | #[derive(Debug, Clone)] 7 | pub struct MultistoreConfig { 8 | pub main_store: Arc, 9 | pub substores: Vec>, 10 | } 11 | 12 | impl MultistoreConfig { 13 | pub fn iter(&self) -> impl Iterator> { 14 | self.substores.iter() 15 | } 16 | 17 | /// Returns the substore matching the key's prefix, return `None` otherwise. 18 | pub fn find_substore(&self, key: &[u8]) -> Option> { 19 | if key.is_empty() { 20 | return Some(self.main_store.clone()); 21 | } 22 | 23 | // Note: This is a linear search, but the number of substores is small. 24 | self.substores 25 | .iter() 26 | .find(|s| key.starts_with(s.prefix.as_bytes())) 27 | .cloned() 28 | } 29 | 30 | /// Route a key to a substore, and return the truncated key and the corresponding `SubstoreConfig`. 31 | /// 32 | /// This method is used for ordinary key-value operations. 33 | /// 34 | /// Note: since this method implements the routing logic for the multistore, 35 | /// callers might prefer [`MultistoreConfig::match_prefix_str`] if they don't 36 | /// need to route the key. 37 | /// 38 | /// # Routing 39 | /// + If the key is a total match for the prefix, the **main store** is returned. 40 | /// + If the key is not a total match for the prefix, the prefix is removed from 41 | /// the key and the key is routed to the substore matching the prefix. 42 | /// + If the key does not match any prefix, the key is routed to the **main store**. 43 | /// + If a delimiter is prefixing the key, it is removed. 44 | /// 45 | /// # Examples 46 | /// `prefix_a/key` -> `key` in `substore_a` 47 | /// `prefix_akey` -> `prefix_akey` in `main_store 48 | /// `prefix_a` -> `prefix_a` in `main_store` 49 | /// `prefix_a/` -> `prefix_a/` in `main_store 50 | /// `nonexistent_prefix` -> `nonexistent_prefix` in `main_store` 51 | pub fn route_key_str<'a>(&self, key: &'a str) -> (&'a str, Arc) { 52 | let config = self 53 | .find_substore(key.as_bytes()) 54 | .unwrap_or_else(|| self.main_store.clone()); 55 | 56 | // If the key is a total match, we want to return the key bound to the 57 | // main store. This is where the root hash of the prefix tree is located. 58 | if key == config.prefix { 59 | return (key, self.main_store.clone()); 60 | } 61 | 62 | let truncated_key = key 63 | .strip_prefix(&config.prefix) 64 | .expect("key has the prefix of the matched substore"); 65 | 66 | // If the key does not contain a delimiter, we return the original key 67 | // routed to the main store. This is because we do not want to allow 68 | // collisions e.g. `prefix_a/key` and `prefix_akey`. 69 | let Some(matching_key) = truncated_key.strip_prefix('/') else { 70 | return (key, self.main_store.clone()); 71 | }; 72 | 73 | // If the matching key is empty, we return the original key routed to 74 | // the main store. This is because we do not want to allow empty keys 75 | // in the substore. 76 | if matching_key.is_empty() { 77 | (key, self.main_store.clone()) 78 | } else { 79 | (matching_key, config) 80 | } 81 | } 82 | 83 | /// Route a key to a substore, and return the truncated key and the corresponding `SubstoreConfig`. 84 | /// 85 | /// This method is used for ordinary key-value operations. 86 | /// 87 | /// Note: since this method implements the routing logic for the multistore, 88 | /// callers might prefer [`MultistoreConfig::match_prefix_bytes`] if they don't 89 | /// need to route the key. 90 | /// 91 | /// # Routing 92 | /// + If the key is a total match for the prefix, the **main store** is returned. 93 | /// + If the key is not a total match for the prefix, the prefix is removed from 94 | /// the key and the key is routed to the substore matching the prefix. 95 | /// + If the key does not match any prefix, the key is routed to the **main store**. 96 | /// + If a delimiter is prefixing the key, it is removed. 97 | /// 98 | /// # Examples 99 | /// `prefix_a/key` -> `key` in `substore_a` 100 | /// `prefix_a` -> `prefix_a` in `main_store` 101 | /// `prefix_a/` -> `prefix_a/` in `main_store` 102 | /// `nonexistent_prefix` -> `nonexistent_prefix` in `main_store` 103 | pub fn route_key_bytes<'a>(&self, key: &'a [u8]) -> (&'a [u8], Arc) { 104 | let config = self 105 | .find_substore(key) 106 | .unwrap_or_else(|| self.main_store.clone()); 107 | 108 | // If the key is a total match for the prefix, we return the original key 109 | // routed to the main store. This is where subtree root hashes are stored. 110 | if key == config.prefix.as_bytes() { 111 | return (key, self.main_store.clone()); 112 | } 113 | 114 | let truncated_key = key 115 | .strip_prefix(config.prefix.as_bytes()) 116 | .expect("key has the prefix of the matched substore"); 117 | 118 | // If the key does not contain a delimiter, we return the original key 119 | // routed to the main store. This is because we do not want to allow 120 | // collisions e.g. `prefix_a/key` and `prefix_akey`. 121 | let Some(matching_key) = truncated_key.strip_prefix(b"/") else { 122 | return (key, self.main_store.clone()); 123 | }; 124 | 125 | // If the matching key is empty, we return the original key routed to 126 | // the main store. This is because we do not want to allow empty keys 127 | // in the substore. 128 | if matching_key.is_empty() { 129 | (key, self.main_store.clone()) 130 | } else { 131 | (matching_key, config) 132 | } 133 | } 134 | 135 | /// Returns the truncated prefix and the corresponding `SubstoreConfig`. 136 | /// 137 | /// This method is used to implement prefix iteration. 138 | /// 139 | /// Unlike [`MultistoreConfig::route_key_str`], this method does not do any routing. 140 | /// It simply finds the substore matching the prefix, strip the prefix and delimiter, 141 | /// and returns the truncated prefix and the corresponding `SubstoreConfig`. 142 | /// 143 | /// # Examples 144 | /// `prefix_a/key` -> `key` in `substore_a` 145 | /// `prefix_a` -> "" in `substore_a` 146 | /// `prefix_a/` -> "" in `substore_a` 147 | /// `nonexistent_prefix` -> "" in `main_store` 148 | pub fn match_prefix_str<'a>(&self, prefix: &'a str) -> (&'a str, Arc) { 149 | let config = self 150 | .find_substore(prefix.as_bytes()) 151 | .unwrap_or_else(|| self.main_store.clone()); 152 | 153 | let truncated_prefix = prefix 154 | .strip_prefix(&config.prefix) 155 | .expect("key has the prefix of the matched substore"); 156 | 157 | let truncated_prefix = truncated_prefix 158 | .strip_prefix('/') 159 | .unwrap_or(truncated_prefix); 160 | (truncated_prefix, config) 161 | } 162 | 163 | /// Returns the truncated prefix and the corresponding `SubstoreConfig`. 164 | /// 165 | /// Unlike [`MultistoreConfig::route_key_str`], this method does not do any routing. 166 | /// It simply finds the substore matching the prefix, strip the prefix and delimiter, 167 | /// and returns the truncated prefix and the corresponding `SubstoreConfig`. 168 | /// 169 | /// This method is used to implement prefix iteration. 170 | /// 171 | /// # Examples 172 | /// `prefix_a/key` -> `key` in `substore_a` 173 | /// `prefix_a` -> "" in `substore_a` 174 | /// `prefix_a/` -> "" in `substore_a` 175 | /// `nonexistent_prefix` -> "" in `main_store` 176 | pub fn match_prefix_bytes<'a>(&self, prefix: &'a [u8]) -> (&'a [u8], Arc) { 177 | let config = self 178 | .find_substore(prefix) 179 | .unwrap_or_else(|| self.main_store.clone()); 180 | 181 | let truncated_prefix = prefix 182 | .strip_prefix(config.prefix.as_bytes()) 183 | .expect("key has the prefix of the matched substore"); 184 | 185 | let truncated_prefix = truncated_prefix 186 | .strip_prefix(b"/") 187 | .unwrap_or(truncated_prefix); 188 | (truncated_prefix, config) 189 | } 190 | } 191 | 192 | impl Default for MultistoreConfig { 193 | fn default() -> Self { 194 | Self { 195 | main_store: Arc::new(SubstoreConfig::new("")), 196 | substores: vec![], 197 | } 198 | } 199 | } 200 | 201 | /// Tracks the latest version of each substore, and wraps a `MultistoreConfig`. 202 | #[derive(Default, Debug)] 203 | pub struct MultistoreCache { 204 | pub config: MultistoreConfig, 205 | pub substores: std::collections::BTreeMap, jmt::Version>, 206 | } 207 | 208 | impl Display for MultistoreCache { 209 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 210 | let mut s = String::new(); 211 | for (substore, version) in &self.substores { 212 | s.push_str(&format!("{}: {}\n", substore.prefix, version)); 213 | } 214 | write!(f, "{}", s) 215 | } 216 | } 217 | 218 | impl MultistoreCache { 219 | pub fn from_config(config: MultistoreConfig) -> Self { 220 | Self { 221 | config, 222 | substores: std::collections::BTreeMap::new(), 223 | } 224 | } 225 | 226 | pub fn set_version(&mut self, substore: Arc, version: jmt::Version) { 227 | self.substores.insert(substore, version); 228 | } 229 | 230 | pub fn get_version(&self, substore: &Arc) -> Option { 231 | self.substores.get(substore).cloned() 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /src/store/substore.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt::{Display, Formatter}, 3 | sync::Arc, 4 | }; 5 | 6 | use anyhow::Result; 7 | use borsh::BorshDeserialize; 8 | use jmt::{ 9 | storage::{HasPreimage, LeafNode, Node, NodeKey, TreeReader}, 10 | KeyHash, RootHash, 11 | }; 12 | use rocksdb::{ColumnFamily, IteratorMode, ReadOptions}; 13 | use tracing::Span; 14 | 15 | use crate::{snapshot::RocksDbSnapshot, Cache}; 16 | 17 | use jmt::storage::TreeWriter; 18 | 19 | /// Specifies the configuration of a substore, which is a prefixed subset of 20 | /// the main store with its own merkle tree, nonverifiable data, preimage index, etc. 21 | #[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] 22 | pub struct SubstoreConfig { 23 | /// The prefix of the substore. If empty, it is the root-level store config. 24 | pub prefix: String, 25 | /// The prefix of the substore including the trailing slash. 26 | pub prefix_with_delimiter: String, 27 | /// name: "substore-{prefix}-jmt" 28 | /// role: persists the logical structure of the JMT 29 | /// maps: `storage::DbNodeKey` to `jmt::Node` 30 | // note: `DbNodeKey` is a newtype around `NodeKey` that serialize the key 31 | // so that it maps to a lexicographical ordering with ascending jmt::Version. 32 | cf_jmt: String, 33 | /// name: "susbstore-{prefix}-jmt-keys" 34 | /// role: JMT key index. 35 | /// maps: key preimages to their keyhash. 36 | cf_jmt_keys: String, 37 | /// name: "substore-{prefix}-jmt-values" 38 | /// role: stores the actual values that JMT leaves point to. 39 | /// maps: KeyHash || BE(version) to an `Option>` 40 | cf_jmt_values: String, 41 | /// name: "substore-{prefix}-jmt-keys-by-keyhash" 42 | /// role: index JMT keys by their keyhash. 43 | /// maps: keyhashes to their preimage. 44 | cf_jmt_keys_by_keyhash: String, 45 | /// name: "substore-{prefix}-nonverifiable" 46 | /// role: auxiliary data that is not part of our merkle tree, and thus not strictly 47 | /// part of consensus. 48 | /// maps: arbitrary keys to arbitrary values. 49 | cf_nonverifiable: String, 50 | } 51 | 52 | impl SubstoreConfig { 53 | pub fn new(prefix: impl ToString) -> Self { 54 | let prefix = prefix.to_string(); 55 | Self { 56 | cf_jmt: format!("substore-{}-jmt", prefix), 57 | cf_jmt_keys: format!("substore-{}-jmt-keys", prefix), 58 | cf_jmt_values: format!("substore-{}-jmt-values", prefix), 59 | cf_jmt_keys_by_keyhash: format!("substore-{}-jmt-keys-by-keyhash", prefix), 60 | cf_nonverifiable: format!("substore-{}-nonverifiable", prefix), 61 | prefix_with_delimiter: format!("{}/", prefix), 62 | prefix, 63 | } 64 | } 65 | 66 | /// Returns an iterator over all column families in this substore. 67 | /// Note(erwan): This is verbose, but very lightweight. 68 | pub fn columns(&self) -> impl Iterator { 69 | std::iter::once(&self.cf_jmt) 70 | .chain(std::iter::once(&self.cf_jmt_keys)) 71 | .chain(std::iter::once(&self.cf_jmt_values)) 72 | .chain(std::iter::once(&self.cf_jmt_keys_by_keyhash)) 73 | .chain(std::iter::once(&self.cf_nonverifiable)) 74 | } 75 | 76 | pub fn cf_jmt<'s>(&self, db_handle: &'s Arc) -> &'s ColumnFamily { 77 | let column = self.cf_jmt.as_str(); 78 | db_handle.cf_handle(column).unwrap_or_else(|| { 79 | panic!( 80 | "jmt column family not found for prefix: {}, substore: {}", 81 | column, self.prefix 82 | ) 83 | }) 84 | } 85 | 86 | pub fn cf_jmt_values<'s>(&self, db_handle: &'s Arc) -> &'s ColumnFamily { 87 | let column = self.cf_jmt_values.as_str(); 88 | db_handle.cf_handle(column).unwrap_or_else(|| { 89 | panic!( 90 | "jmt-values column family not found for prefix: {}, substore: {}", 91 | column, self.prefix 92 | ) 93 | }) 94 | } 95 | 96 | pub fn cf_jmt_keys_by_keyhash<'s>(&self, db_handle: &'s Arc) -> &'s ColumnFamily { 97 | let column = self.cf_jmt_keys_by_keyhash.as_str(); 98 | db_handle.cf_handle(column).unwrap_or_else(|| { 99 | panic!( 100 | "jmt-keys-by-keyhash column family not found for prefix: {}, substore: {}", 101 | column, self.prefix 102 | ) 103 | }) 104 | } 105 | 106 | pub fn cf_jmt_keys<'s>(&self, db_handle: &'s Arc) -> &'s ColumnFamily { 107 | let column = self.cf_jmt_keys.as_str(); 108 | db_handle.cf_handle(column).unwrap_or_else(|| { 109 | panic!( 110 | "jmt-keys column family not found for prefix: {}, substore: {}", 111 | column, self.prefix 112 | ) 113 | }) 114 | } 115 | 116 | pub fn cf_nonverifiable<'s>(&self, db_handle: &'s Arc) -> &'s ColumnFamily { 117 | let column = self.cf_nonverifiable.as_str(); 118 | db_handle.cf_handle(column).unwrap_or_else(|| { 119 | panic!( 120 | "nonverifiable column family not found for prefix: {}, substore: {}", 121 | column, self.prefix 122 | ) 123 | }) 124 | } 125 | 126 | pub fn latest_version_from_db( 127 | &self, 128 | db_handle: &Arc, 129 | ) -> Result> { 130 | Ok(self 131 | .get_rightmost_leaf_from_db(db_handle)? 132 | .map(|(node_key, _)| node_key.version())) 133 | } 134 | 135 | pub fn latest_version_from_snapshot( 136 | &self, 137 | db_handle: &Arc, 138 | snapshot: &RocksDbSnapshot, 139 | ) -> Result> { 140 | Ok(self 141 | .get_rightmost_leaf_from_snapshot(db_handle, snapshot)? 142 | .map(|(node_key, _)| node_key.version())) 143 | } 144 | 145 | // TODO(erwan): having two different implementations of this is a bit weird and should 146 | // be refactored, or remodeled. The DB version is only used during initialization, before 147 | // a `Snapshot` is available. 148 | fn get_rightmost_leaf_from_db( 149 | &self, 150 | db_handle: &Arc, 151 | ) -> Result> { 152 | let cf_jmt = self.cf_jmt(db_handle); 153 | let mut iter = db_handle.raw_iterator_cf(cf_jmt); 154 | iter.seek_to_last(); 155 | 156 | if iter.valid() { 157 | let node_key = 158 | DbNodeKey::decode(iter.key().expect("all DB entries should have a key"))? 159 | .into_inner(); 160 | let node = 161 | Node::try_from_slice(iter.value().expect("all DB entries should have a value"))?; 162 | 163 | if let Node::Leaf(leaf_node) = node { 164 | return Ok(Some((node_key, leaf_node))); 165 | } 166 | } else { 167 | // There are no keys in the database 168 | } 169 | 170 | Ok(None) 171 | } 172 | 173 | fn get_rightmost_leaf_from_snapshot( 174 | &self, 175 | db_handle: &Arc, 176 | snapshot: &RocksDbSnapshot, 177 | ) -> Result> { 178 | let cf_jmt = self.cf_jmt(db_handle); 179 | let mut iter = snapshot.iterator_cf(cf_jmt, IteratorMode::End); 180 | let Some((raw_key, raw_value)) = iter.next().transpose()? else { 181 | return Ok(None); 182 | }; 183 | 184 | let node_key = DbNodeKey::decode(&raw_key)?.into_inner(); 185 | let Node::Leaf(leaf) = Node::try_from_slice(&raw_value)? else { 186 | return Ok(None); 187 | }; 188 | Ok(Some((node_key, leaf))) 189 | } 190 | } 191 | 192 | impl Display for SubstoreConfig { 193 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { 194 | write!(f, "SubstoreConfig(prefix={})", self.prefix) 195 | } 196 | } 197 | 198 | /// A read-only view into a substore at a specific state version. 199 | /// 200 | /// A [`SubstoreSnapshot`] is lightweight and cheap to create, it can be 201 | /// instantiated on-demand when a read-only view of a substore's state is 202 | /// needed. 203 | pub struct SubstoreSnapshot { 204 | pub(crate) config: Arc, 205 | pub(crate) rocksdb_snapshot: Arc, 206 | pub(crate) version: jmt::Version, 207 | pub(crate) db: Arc, 208 | } 209 | 210 | impl SubstoreSnapshot { 211 | pub fn root_hash(&self) -> Result { 212 | let version = self.version(); 213 | let tree = jmt::Sha256Jmt::new(self); 214 | Ok(tree 215 | .get_root_hash_option(version)? 216 | .unwrap_or(jmt::RootHash([0; 32]))) 217 | } 218 | 219 | pub fn version(&self) -> jmt::Version { 220 | self.version 221 | } 222 | 223 | /// Returns some value corresponding to the key, along with an ICS23 existence proof 224 | /// up to the current JMT root hash. If the key is not present, returns `None` and a 225 | /// non-existence proof. 226 | pub(crate) fn get_with_proof( 227 | &self, 228 | key: Vec, 229 | ) -> Result<(Option>, ics23::CommitmentProof)> { 230 | let version = self.version(); 231 | let tree = jmt::Sha256Jmt::new(self); 232 | tree.get_with_ics23_proof(key, version) 233 | } 234 | 235 | /// Helper function used by `get_raw` and `prefix_raw`. 236 | /// 237 | /// Reads from the JMT will fail if the root is missing; this method 238 | /// special-cases the empty tree case so that reads on an empty tree just 239 | /// return None. 240 | pub fn get_jmt(&self, key: jmt::KeyHash) -> Result>> { 241 | let tree = jmt::Sha256Jmt::new(self); 242 | match tree.get(key, self.version()) { 243 | Ok(Some(value)) => { 244 | tracing::trace!(substore = ?self.config.prefix, version = ?self.version(), ?key, value = ?hex::encode(&value), "read from tree"); 245 | Ok(Some(value)) 246 | } 247 | Ok(None) => { 248 | tracing::trace!(substore = ?self.config.prefix, version = ?self.version(), ?key, "key not found in tree"); 249 | Ok(None) 250 | } 251 | // This allows for using the Overlay on an empty database without 252 | // errors We only skip the `MissingRootError` if the `version` is 253 | // `u64::MAX`, the pre-genesis version. Otherwise, a missing root 254 | // actually does indicate a problem. 255 | Err(e) 256 | if e.downcast_ref::().is_some() 257 | && self.version() == u64::MAX => 258 | { 259 | tracing::trace!(substore = ?self.config.prefix, version = ?self.version(), "no data available at this version"); 260 | Ok(None) 261 | } 262 | Err(e) => Err(e), 263 | } 264 | } 265 | } 266 | 267 | impl TreeReader for SubstoreSnapshot { 268 | /// Gets a value by identifier, returning the newest value whose version is *less than or 269 | /// equal to* the specified version. Returns `None` if the value does not exist. 270 | fn get_value_option( 271 | &self, 272 | max_version: jmt::Version, 273 | key_hash: KeyHash, 274 | ) -> Result> { 275 | let cf_jmt_values = self.config.cf_jmt_values(&self.db); 276 | 277 | // Prefix ranges exclude the upper bound in the iterator result. 278 | // This means that when requesting the largest possible version, there 279 | // is no way to specify a range that is inclusive of `u64::MAX`. 280 | if max_version == u64::MAX { 281 | let k = VersionedKeyHash { 282 | version: u64::MAX, 283 | key_hash, 284 | }; 285 | 286 | if let Some(v) = self.rocksdb_snapshot.get_cf(cf_jmt_values, k.encode())? { 287 | let maybe_value: Option> = BorshDeserialize::try_from_slice(v.as_ref())?; 288 | return Ok(maybe_value); 289 | } 290 | } 291 | 292 | let mut lower_bound = key_hash.0.to_vec(); 293 | lower_bound.extend_from_slice(&0u64.to_be_bytes()); 294 | 295 | let mut upper_bound = key_hash.0.to_vec(); 296 | // The upper bound is excluded from the iteration results. 297 | upper_bound.extend_from_slice(&(max_version.saturating_add(1)).to_be_bytes()); 298 | 299 | let mut readopts = ReadOptions::default(); 300 | readopts.set_iterate_lower_bound(lower_bound); 301 | readopts.set_iterate_upper_bound(upper_bound); 302 | let mut iterator = 303 | self.rocksdb_snapshot 304 | .iterator_cf_opt(cf_jmt_values, readopts, IteratorMode::End); 305 | 306 | let Some(tuple) = iterator.next() else { 307 | return Ok(None); 308 | }; 309 | 310 | let (_key, v) = tuple?; 311 | let maybe_value = BorshDeserialize::try_from_slice(v.as_ref())?; 312 | Ok(maybe_value) 313 | } 314 | 315 | /// Gets node given a node key. Returns `None` if the node does not exist. 316 | fn get_node_option(&self, node_key: &NodeKey) -> Result> { 317 | let db_node_key = DbNodeKey::from(node_key.clone()); 318 | tracing::trace!(?node_key); 319 | 320 | let cf_jmt = self.config.cf_jmt(&self.db); 321 | let value = self 322 | .rocksdb_snapshot 323 | .get_cf(cf_jmt, db_node_key.encode()?)? 324 | .map(|db_slice| Node::try_from_slice(&db_slice)) 325 | .transpose()?; 326 | 327 | tracing::trace!(?node_key, ?value); 328 | Ok(value) 329 | } 330 | 331 | fn get_rightmost_leaf(&self) -> Result> { 332 | let cf_jmt = self.config.cf_jmt(&self.db); 333 | let mut iter = self.rocksdb_snapshot.raw_iterator_cf(cf_jmt); 334 | iter.seek_to_last(); 335 | 336 | if iter.valid() { 337 | let node_key = 338 | DbNodeKey::decode(iter.key().expect("all DB entries should have a key"))? 339 | .into_inner(); 340 | let node = 341 | Node::try_from_slice(iter.value().expect("all DB entries should have a value"))?; 342 | 343 | if let Node::Leaf(leaf_node) = node { 344 | return Ok(Some((node_key, leaf_node))); 345 | } 346 | } else { 347 | // There are no keys in the database 348 | } 349 | 350 | Ok(None) 351 | } 352 | } 353 | 354 | impl HasPreimage for SubstoreSnapshot { 355 | fn preimage(&self, key_hash: KeyHash) -> Result>> { 356 | let cf_jmt_keys_by_keyhash = self.config.cf_jmt_keys_by_keyhash(&self.db); 357 | 358 | Ok(self 359 | .rocksdb_snapshot 360 | .get_cf(cf_jmt_keys_by_keyhash, key_hash.0)?) 361 | } 362 | } 363 | 364 | pub struct SubstoreStorage { 365 | pub(crate) substore_snapshot: SubstoreSnapshot, 366 | } 367 | 368 | impl SubstoreStorage { 369 | pub async fn commit( 370 | self, 371 | cache: Cache, 372 | mut write_batch: rocksdb::WriteBatch, 373 | write_version: jmt::Version, 374 | perform_migration: bool, 375 | ) -> Result<(RootHash, rocksdb::WriteBatch)> { 376 | let span = Span::current(); 377 | 378 | tokio::task 379 | ::spawn_blocking(move || { 380 | span.in_scope(|| { 381 | let jmt = jmt::Sha256Jmt::new(&self.substore_snapshot); 382 | let unwritten_changes: Vec<_> = cache 383 | .unwritten_changes 384 | .into_iter() 385 | .map(|(key, some_value)| (KeyHash::with::(&key), key, some_value)) 386 | .collect(); 387 | 388 | let cf_jmt_keys = self.substore_snapshot.config.cf_jmt_keys(&self.substore_snapshot.db); 389 | let cf_jmt_keys_by_keyhash = self.substore_snapshot.config.cf_jmt_keys_by_keyhash(&self.substore_snapshot.db); 390 | let cf_jmt = self.substore_snapshot.config.cf_jmt(&self.substore_snapshot.db); 391 | let cf_jmt_values = self.substore_snapshot.config.cf_jmt_values(&self.substore_snapshot.db); 392 | 393 | /* Keyhash and pre-image indices */ 394 | for (keyhash, key_preimage, value) in unwritten_changes.iter() { 395 | match value { 396 | Some(_) => { /* Key inserted, or updated, so we add it to the keyhash index */ 397 | write_batch.put_cf(cf_jmt_keys, key_preimage, keyhash.0); 398 | write_batch 399 | .put_cf(cf_jmt_keys_by_keyhash, keyhash.0, key_preimage) 400 | } 401 | None => { /* Key deleted, so we delete it from the preimage and keyhash index entries */ 402 | write_batch.delete_cf(cf_jmt_keys, key_preimage); 403 | write_batch.delete_cf(cf_jmt_keys_by_keyhash, keyhash.0); 404 | } 405 | }; 406 | } 407 | 408 | // We only track the keyhash and possible values; at the time of writing, 409 | // `rustfmt` panics on inlining the closure, so we use a helper function to skip the key. 410 | let skip_key = |(keyhash, _key, some_value)| (keyhash, some_value); 411 | 412 | let (root_hash, batch) = if perform_migration { 413 | // TODO(erwan): this should be feature-gated behind `migration` 414 | // activating `jmt/migration` more judiciously. 415 | jmt.append_value_set(unwritten_changes.into_iter().map(skip_key), write_version)? 416 | } else { 417 | jmt.put_value_set(unwritten_changes.into_iter().map(skip_key), write_version)? 418 | }; 419 | 420 | /* JMT nodes and values */ 421 | for (node_key, node) in batch.node_batch.nodes() { 422 | let db_node_key_bytes= DbNodeKey::encode_from_node_key(node_key)?; 423 | let value_bytes = borsh::to_vec(node)?; 424 | tracing::trace!(?db_node_key_bytes, value_bytes = ?hex::encode(&value_bytes)); 425 | write_batch.put_cf(cf_jmt, db_node_key_bytes, value_bytes); 426 | } 427 | 428 | 429 | for ((version, key_hash), some_value) in batch.node_batch.values() { 430 | let key_bytes = VersionedKeyHash::encode_from_keyhash(key_hash, version); 431 | let value_bytes = borsh::to_vec(some_value)?; 432 | tracing::trace!(?key_bytes, value_bytes = ?hex::encode(&value_bytes)); 433 | write_batch.put_cf(cf_jmt_values, key_bytes, value_bytes); 434 | } 435 | 436 | tracing::trace!(?root_hash, "accumulated node changes in the write batch"); 437 | 438 | 439 | for (k, v) in cache.nonverifiable_changes.into_iter() { 440 | let cf_nonverifiable = self.substore_snapshot.config.cf_nonverifiable(&self.substore_snapshot.db); 441 | match v { 442 | Some(v) => { 443 | tracing::trace!(key = ?crate::EscapedByteSlice(&k), value = ?crate::EscapedByteSlice(&v), "put nonverifiable key"); 444 | write_batch.put_cf(cf_nonverifiable, k, &v); 445 | } 446 | None => { 447 | write_batch.delete_cf(cf_nonverifiable, k); 448 | } 449 | }; 450 | } 451 | 452 | Ok((root_hash, write_batch)) 453 | }) 454 | }) 455 | .await? 456 | } 457 | } 458 | 459 | impl TreeWriter for SubstoreStorage { 460 | fn write_node_batch(&self, _node_batch: &jmt::storage::NodeBatch) -> Result<()> { 461 | // The "write"-part of the `TreeReader + TreeWriter` jmt architecture does not work 462 | // well with a deferred write strategy. 463 | // What we would like to do is to accumulate the changes in a write batch, and then commit 464 | // them all at once. This isn't possible to do easily because the `TreeWriter` trait 465 | // rightfully does not expose RocksDB-specific types in its API. 466 | // 467 | // The alternative is to use interior mutability but the semantics become 468 | // so implementation specific that we lose the benefits of the trait abstraction. 469 | unimplemented!("We inline the tree writing logic in the `commit` method") 470 | } 471 | } 472 | 473 | /// An ordered node key is a node key that is encoded in a way that 474 | /// preserves the order of the node keys in the database. 475 | pub struct DbNodeKey(pub NodeKey); 476 | 477 | impl DbNodeKey { 478 | pub fn from(node_key: NodeKey) -> Self { 479 | DbNodeKey(node_key) 480 | } 481 | 482 | pub fn into_inner(self) -> NodeKey { 483 | self.0 484 | } 485 | 486 | pub fn encode(&self) -> Result> { 487 | Self::encode_from_node_key(&self.0) 488 | } 489 | 490 | pub fn encode_from_node_key(node_key: &NodeKey) -> Result> { 491 | let mut bytes = Vec::new(); 492 | bytes.extend_from_slice(&node_key.version().to_be_bytes()); // encode version as big-endian 493 | let rest = borsh::to_vec(node_key)?; 494 | bytes.extend_from_slice(&rest); 495 | Ok(bytes) 496 | } 497 | 498 | pub fn decode(bytes: impl AsRef<[u8]>) -> Result { 499 | if bytes.as_ref().len() < 8 { 500 | anyhow::bail!("byte slice is too short") 501 | } 502 | // Ignore the bytes that encode the version 503 | let node_key_slice = bytes.as_ref()[8..].to_vec(); 504 | let node_key = borsh::BorshDeserialize::try_from_slice(&node_key_slice)?; 505 | Ok(DbNodeKey(node_key)) 506 | } 507 | } 508 | 509 | /// Represent a JMT key hash at a specific `jmt::Version` 510 | /// This is used to index the JMT values in RocksDB. 511 | #[derive(Clone, Debug)] 512 | pub struct VersionedKeyHash { 513 | pub key_hash: KeyHash, 514 | pub version: jmt::Version, 515 | } 516 | 517 | impl VersionedKeyHash { 518 | pub fn encode(&self) -> Vec { 519 | VersionedKeyHash::encode_from_keyhash(&self.key_hash, &self.version) 520 | } 521 | 522 | pub fn encode_from_keyhash(key_hash: &KeyHash, version: &jmt::Version) -> Vec { 523 | let mut buf: Vec = key_hash.0.to_vec(); 524 | buf.extend_from_slice(&version.to_be_bytes()); 525 | buf 526 | } 527 | 528 | #[allow(dead_code)] 529 | pub fn decode(buf: Vec) -> Result { 530 | if buf.len() != 40 { 531 | Err(anyhow::anyhow!( 532 | "could not decode buffer into VersionedKey (invalid size)" 533 | )) 534 | } else { 535 | let raw_key_hash: [u8; 32] = buf[0..32] 536 | .try_into() 537 | .expect("buffer is at least 40 bytes wide"); 538 | let key_hash = KeyHash(raw_key_hash); 539 | 540 | let raw_version: [u8; 8] = buf[32..40] 541 | .try_into() 542 | .expect("buffer is at least 40 bytes wide"); 543 | let version: u64 = u64::from_be_bytes(raw_version); 544 | 545 | Ok(VersionedKeyHash { version, key_hash }) 546 | } 547 | } 548 | } 549 | -------------------------------------------------------------------------------- /src/tests/delta.rs: -------------------------------------------------------------------------------- 1 | /* 2 | #[tokio::test] 3 | async fn garden_of_forking_paths() -> anyhow::Result<()> { 4 | tracing_subscriber::fmt::init(); 5 | let storage = TempStorage::new().await?; 6 | 7 | let mut state_init = storage.latest_snapshot(); 8 | 9 | // TODO: do we still want to have StateTransaction ? 10 | // what if we just made StateDelta be StateTransaction ? 11 | // what are the downsides? forced allocation for range queries? 12 | // where do we get the events out? 13 | let mut tx = state_init.begin_transaction(); 14 | tx.put_raw("base".to_owned(), b"base".to_vec()); 15 | tx.apply(); 16 | storage.commit(state_init).await?; 17 | 18 | let mut state = storage.latest_snapshot(); 19 | let mut tx = state.begin_transaction(); 20 | 21 | // We can create a StateDelta from a borrow, it will take ownership of the borrow while the family is live 22 | let mut delta = StateDelta::new(&mut tx); 23 | delta.put_raw("delta".to_owned(), b"delta".to_vec()); 24 | 25 | // We can also nest StateDeltas -- unlike fork, this will only flatten down to the nesting point. 26 | let mut d2 = StateDelta::new(&mut delta); 27 | 28 | let mut delta_a = d2.fork(); 29 | let mut delta_b = d2.fork(); 30 | delta_a.put_raw("delta".to_owned(), b"delta_a".to_vec()); 31 | delta_b.put_raw("delta".to_owned(), b"delta_b".to_vec()); 32 | let mut delta_a_base = delta_a.fork(); 33 | let mut delta_b_base = delta_b.fork(); 34 | delta_a_base.delete("base".to_owned()); 35 | delta_b_base.delete("base".to_owned()); 36 | 37 | assert_eq!(delta_a.get_raw("base").await?, Some(b"base".to_vec())); 38 | assert_eq!(delta_a.get_raw("base").await?, Some(b"base".to_vec())); 39 | assert_eq!(delta_a_base.get_raw("base").await?, None); 40 | assert_eq!(delta_b_base.get_raw("base").await?, None); 41 | 42 | assert_eq!(delta_a.get_raw("delta").await?, Some(b"delta_a".to_vec())); 43 | assert_eq!( 44 | delta_a_base.get_raw("delta").await?, 45 | Some(b"delta_a".to_vec()) 46 | ); 47 | assert_eq!(delta_b.get_raw("delta").await?, Some(b"delta_b".to_vec())); 48 | assert_eq!( 49 | delta_b_base.get_raw("delta").await?, 50 | Some(b"delta_b".to_vec()) 51 | ); 52 | 53 | // Pick one we like and apply it, releasing the &mut delta reference... 54 | // Note: flattens delta_b_base -> delta_b -> delta and stops! 55 | delta_b_base.apply(); 56 | // ... so we can read from delta again. 57 | assert_eq!(delta.get_raw("base").await?, None); 58 | assert_eq!(delta.get_raw("delta").await?, Some(b"delta_b".to_vec())); 59 | 60 | delta.apply(); 61 | tx.apply(); 62 | storage.commit(state).await?; 63 | 64 | let state = storage.latest_snapshot(); 65 | assert_eq!(state.get_raw("base").await?, None); 66 | assert_eq!(state.get_raw("delta").await?, Some(b"delta_b".to_vec())); 67 | 68 | Ok(()) 69 | } 70 | 71 | #[tokio::test] 72 | async fn simple_flow() -> anyhow::Result<()> { 73 | //tracing_subscriber::fmt::init(); 74 | let tmpdir = tempfile::tempdir()?; 75 | 76 | // Initialize an empty Storage in the new directory 77 | let storage = Storage::load(tmpdir.path().to_owned()).await?; 78 | 79 | // Version -1 to Version 0 writes 80 | // 81 | // tx00: test => test 82 | // tx00: c/aa => 0 [object store] 83 | // tx00: c/ab => 1 [object store] 84 | // tx00: c/ac => 2 [object store] 85 | // tx00: c/ad => 3 [object store] 86 | // tx00: iA => A [nonverifiable store] 87 | // tx00: iC => C [nonverifiable store] 88 | // tx00: iF => F [nonverifiable store] 89 | // tx00: iD => D [nonverifiable store] 90 | // tx01: a/aa => aa 91 | // tx01: a/aaa => aaa 92 | // tx01: a/ab => ab 93 | // tx01: a/z => z 94 | // tx01: c/ab => 10 [object store] 95 | // tx01: c/ac => [deleted] [object store] 96 | // 97 | // Version 0 to Version 1 writes 98 | // tx10: test => [deleted] 99 | // tx10: a/aaa => [deleted] 100 | // tx10: a/c => c 101 | // tx10: iB => B [nonverifiable store] 102 | // tx11: a/ab => ab2 103 | // tx11: iD => [deleted] nonverifiable store] 104 | 105 | let mut state_init = StateDelta::new(storage.latest_snapshot()); 106 | // Check that reads on an empty state return Ok(None) 107 | assert_eq!(state_init.get_raw("test").await?, None); 108 | assert_eq!(state_init.get_raw("a/aa").await?, None); 109 | 110 | // Create tx00 111 | let mut tx00 = StateDelta::new(&mut state_init); 112 | tx00.put_raw("test".to_owned(), b"test".to_vec()); 113 | tx00.object_put("c/aa", 0u64); 114 | tx00.object_put("c/ab", 1u64); 115 | tx00.object_put("c/ac", 2u64); 116 | tx00.object_put("c/ad", 3u64); 117 | tx00.nonverifiable_put_raw(b"iA".to_vec(), b"A".to_vec()); 118 | tx00.nonverifiable_put_raw(b"iC".to_vec(), b"C".to_vec()); 119 | tx00.nonverifiable_put_raw(b"iF".to_vec(), b"F".to_vec()); 120 | tx00.nonverifiable_put_raw(b"iD".to_vec(), b"D".to_vec()); 121 | 122 | // Check reads against tx00: 123 | // This is present in tx00 124 | assert_eq!(tx00.get_raw("test").await?, Some(b"test".to_vec())); 125 | // This is missing in tx00 and state_init and tree is empty 126 | assert_eq!(tx00.get_raw("a/aa").await?, None); 127 | // Present in tx00 object store 128 | assert_eq!(tx00.object_get("c/aa"), Some(0u64)); 129 | assert_eq!(tx00.object_get("c/ab"), Some(1u64)); 130 | assert_eq!(tx00.object_get("c/ac"), Some(2u64)); 131 | assert_eq!(tx00.object_get("c/ad"), Some(3u64)); 132 | // Present in tx00 object store but requested with wrong type 133 | assert_eq!(tx00.object_get::("c/aa"), None); 134 | // Missing in tx00 object store 135 | assert_eq!(tx00.object_get::("nonexist"), None); 136 | // Nonconsensus range checks 137 | let mut range = tx00.nonverifiable_prefix_raw(b"i"); 138 | assert_eq!( 139 | range.next().await.transpose()?, 140 | Some((b"iA".to_vec(), b"A".to_vec())) 141 | ); 142 | assert_eq!( 143 | range.next().await.transpose()?, 144 | Some((b"iC".to_vec(), b"C".to_vec())) 145 | ); 146 | assert_eq!( 147 | range.next().await.transpose()?, 148 | Some((b"iD".to_vec(), b"D".to_vec())) 149 | ); 150 | assert_eq!( 151 | range.next().await.transpose()?, 152 | Some((b"iF".to_vec(), b"F".to_vec())) 153 | ); 154 | assert_eq!(range.next().await.transpose()?, None); 155 | std::mem::drop(range); 156 | 157 | // Now apply the transaction to state_init 158 | tx00.apply(); 159 | assert_eq!(state_init.get_raw("test").await?, Some(b"test".to_vec())); 160 | assert_eq!(state_init.get_raw("a/aa").await?, None); 161 | // Present in state_init object store 162 | assert_eq!(state_init.object_get("c/aa"), Some(0u64)); 163 | assert_eq!(state_init.object_get("c/ab"), Some(1u64)); 164 | assert_eq!(state_init.object_get("c/ac"), Some(2u64)); 165 | assert_eq!(state_init.object_get("c/ad"), Some(3u64)); 166 | // Present in state_init object store but requested with wrong type 167 | assert_eq!(state_init.object_get::("c/aa"), None); 168 | // Missing in state_init object store 169 | assert_eq!(state_init.object_get::("nonexist"), None); 170 | // Nonconsensus range checks 171 | let mut range = state_init.nonverifiable_prefix_raw(b"i"); 172 | assert_eq!( 173 | range.next().await.transpose()?, 174 | Some((b"iA".to_vec(), b"A".to_vec())) 175 | ); 176 | assert_eq!( 177 | range.next().await.transpose()?, 178 | Some((b"iC".to_vec(), b"C".to_vec())) 179 | ); 180 | assert_eq!( 181 | range.next().await.transpose()?, 182 | Some((b"iD".to_vec(), b"D".to_vec())) 183 | ); 184 | assert_eq!( 185 | range.next().await.transpose()?, 186 | Some((b"iF".to_vec(), b"F".to_vec())) 187 | ); 188 | assert_eq!(range.next().await.transpose()?, None); 189 | std::mem::drop(range); 190 | 191 | // Create a transaction writing the other keys. 192 | let mut tx01 = StateDelta::new(&mut state_init); 193 | tx01.put_raw("a/aa".to_owned(), b"aa".to_vec()); 194 | tx01.put_raw("a/aaa".to_owned(), b"aaa".to_vec()); 195 | tx01.put_raw("a/ab".to_owned(), b"ab".to_vec()); 196 | tx01.put_raw("a/z".to_owned(), b"z".to_vec()); 197 | tx01.object_put("c/ab", 10u64); 198 | tx01.object_delete("c/ac"); 199 | 200 | // Check reads against tx01: 201 | // This is missing in tx01 and reads through to state_init 202 | assert_eq!(tx01.get_raw("test").await?, Some(b"test".to_vec())); 203 | // This is present in tx01 204 | assert_eq!(tx01.get_raw("a/aa").await?, Some(b"aa".to_vec())); 205 | assert_eq!(tx01.get_raw("a/aaa").await?, Some(b"aaa".to_vec())); 206 | assert_eq!(tx01.get_raw("a/ab").await?, Some(b"ab".to_vec())); 207 | assert_eq!(tx01.get_raw("a/z").await?, Some(b"z".to_vec())); 208 | // This is missing in tx01 and in state_init 209 | assert_eq!(tx01.get_raw("a/c").await?, None); 210 | let mut range = tx01.prefix_raw("a/"); 211 | let mut range_keys = tx01.prefix_keys("a/"); 212 | assert_eq!( 213 | range.next().await.transpose()?, 214 | Some(("a/aa".to_owned(), b"aa".to_vec())) 215 | ); 216 | assert_eq!( 217 | range_keys.next().await.transpose()?, 218 | Some("a/aa".to_owned()) 219 | ); 220 | assert_eq!( 221 | range.next().await.transpose()?, 222 | Some(("a/aaa".to_owned(), b"aaa".to_vec())) 223 | ); 224 | assert_eq!( 225 | range_keys.next().await.transpose()?, 226 | Some("a/aaa".to_owned()) 227 | ); 228 | assert_eq!( 229 | range.next().await.transpose()?, 230 | Some(("a/ab".to_owned(), b"ab".to_vec())) 231 | ); 232 | assert_eq!( 233 | range_keys.next().await.transpose()?, 234 | Some("a/ab".to_owned()) 235 | ); 236 | assert_eq!( 237 | range.next().await.transpose()?, 238 | Some(("a/z".to_owned(), b"z".to_vec())) 239 | ); 240 | assert_eq!(range_keys.next().await.transpose()?, Some("a/z".to_owned())); 241 | assert_eq!(range.next().await.transpose()?, None); 242 | assert_eq!(range_keys.next().await.transpose()?, None); 243 | std::mem::drop(range); 244 | std::mem::drop(range_keys); 245 | 246 | // Now apply the transaction to state_init 247 | tx01.apply(); 248 | 249 | // Check reads against state_init: 250 | // This is present in state_init 251 | assert_eq!(state_init.get_raw("test").await?, Some(b"test".to_vec())); 252 | assert_eq!(state_init.get_raw("a/aa").await?, Some(b"aa".to_vec())); 253 | assert_eq!(state_init.get_raw("a/aaa").await?, Some(b"aaa".to_vec())); 254 | assert_eq!(state_init.get_raw("a/ab").await?, Some(b"ab".to_vec())); 255 | assert_eq!(state_init.get_raw("a/z").await?, Some(b"z".to_vec())); 256 | // This is missing in state_init 257 | assert_eq!(state_init.get_raw("a/c").await?, None); 258 | let mut range = state_init.prefix_raw("a/"); 259 | let mut range_keys = state_init.prefix_keys("a/"); 260 | assert_eq!( 261 | range.next().await.transpose()?, 262 | Some(("a/aa".to_owned(), b"aa".to_vec())) 263 | ); 264 | assert_eq!( 265 | range_keys.next().await.transpose()?, 266 | Some("a/aa".to_owned()) 267 | ); 268 | assert_eq!( 269 | range.next().await.transpose()?, 270 | Some(("a/aaa".to_owned(), b"aaa".to_vec())) 271 | ); 272 | assert_eq!( 273 | range_keys.next().await.transpose()?, 274 | Some("a/aaa".to_owned()) 275 | ); 276 | assert_eq!( 277 | range.next().await.transpose()?, 278 | Some(("a/ab".to_owned(), b"ab".to_vec())) 279 | ); 280 | assert_eq!( 281 | range_keys.next().await.transpose()?, 282 | Some("a/ab".to_owned()) 283 | ); 284 | assert_eq!( 285 | range.next().await.transpose()?, 286 | Some(("a/z".to_owned(), b"z".to_vec())) 287 | ); 288 | assert_eq!(range_keys.next().await.transpose()?, Some("a/z".to_owned())); 289 | assert_eq!(range.next().await.transpose()?, None); 290 | assert_eq!(range_keys.next().await.transpose()?, None); 291 | std::mem::drop(range); 292 | std::mem::drop(range_keys); 293 | 294 | // Now commit state_init to storage 295 | storage.commit_delta(state_init).await?; 296 | 297 | // Now we have version 0. 298 | let mut state0 = StateDelta::new(storage.latest_snapshot()); 299 | //assert_eq!(state0.version(), 0); 300 | // Check reads against state0: 301 | // This is missing in state0 and present in JMT 302 | assert_eq!(state0.get_raw("test").await?, Some(b"test".to_vec())); 303 | assert_eq!(state0.get_raw("a/aa").await?, Some(b"aa".to_vec())); 304 | assert_eq!(state0.get_raw("a/aaa").await?, Some(b"aaa".to_vec())); 305 | assert_eq!(state0.get_raw("a/ab").await?, Some(b"ab".to_vec())); 306 | assert_eq!(state0.get_raw("a/z").await?, Some(b"z".to_vec())); 307 | // This is missing in state0 and missing in JMT 308 | assert_eq!(state0.get_raw("a/c").await?, None); 309 | let mut range = state0.prefix_raw("a/"); 310 | assert_eq!( 311 | range.next().await.transpose()?, 312 | Some(("a/aa".to_owned(), b"aa".to_vec())) 313 | ); 314 | assert_eq!( 315 | range.next().await.transpose()?, 316 | Some(("a/aaa".to_owned(), b"aaa".to_vec())) 317 | ); 318 | assert_eq!( 319 | range.next().await.transpose()?, 320 | Some(("a/ab".to_owned(), b"ab".to_vec())) 321 | ); 322 | assert_eq!( 323 | range.next().await.transpose()?, 324 | Some(("a/z".to_owned(), b"z".to_vec())) 325 | ); 326 | assert_eq!(range.next().await.transpose()?, None); 327 | std::mem::drop(range); 328 | // Nonconsensus range checks 329 | let mut range = state0.nonverifiable_prefix_raw(b"i"); 330 | assert_eq!( 331 | range.next().await.transpose()?, 332 | Some((b"iA".to_vec(), b"A".to_vec())) 333 | ); 334 | assert_eq!( 335 | range.next().await.transpose()?, 336 | Some((b"iC".to_vec(), b"C".to_vec())) 337 | ); 338 | assert_eq!( 339 | range.next().await.transpose()?, 340 | Some((b"iD".to_vec(), b"D".to_vec())) 341 | ); 342 | assert_eq!( 343 | range.next().await.transpose()?, 344 | Some((b"iF".to_vec(), b"F".to_vec())) 345 | ); 346 | assert_eq!(range.next().await.transpose()?, None); 347 | std::mem::drop(range); 348 | 349 | // Start building a transaction 350 | let mut tx10 = StateDelta::new(&mut state0); 351 | tx10.delete("test".to_owned()); 352 | tx10.delete("a/aaa".to_owned()); 353 | tx10.put_raw("a/c".to_owned(), b"c".to_vec()); 354 | tx10.nonverifiable_put_raw(b"iB".to_vec(), b"B".to_vec()); 355 | 356 | // Check reads against tx10: 357 | // This is deleted in tx10, missing in state0, present in JMT 358 | assert_eq!(tx10.get_raw("test").await?, None); 359 | assert_eq!(tx10.get_raw("a/aaa").await?, None); 360 | // This is missing in tx10, missing in state0, present in JMT 361 | assert_eq!(tx10.get_raw("a/aa").await?, Some(b"aa".to_vec())); 362 | assert_eq!(tx10.get_raw("a/ab").await?, Some(b"ab".to_vec())); 363 | assert_eq!(tx10.get_raw("a/z").await?, Some(b"z".to_vec())); 364 | // This is present in tx10, missing in state0, missing in JMT 365 | assert_eq!(tx10.get_raw("a/c").await?, Some(b"c".to_vec())); 366 | let mut range = tx10.prefix_raw("a/"); 367 | assert_eq!( 368 | range.next().await.transpose()?, 369 | Some(("a/aa".to_owned(), b"aa".to_vec())) 370 | ); 371 | assert_eq!( 372 | range.next().await.transpose()?, 373 | Some(("a/ab".to_owned(), b"ab".to_vec())) 374 | ); 375 | assert_eq!( 376 | range.next().await.transpose()?, 377 | Some(("a/c".to_owned(), b"c".to_vec())) 378 | ); 379 | assert_eq!( 380 | range.next().await.transpose()?, 381 | Some(("a/z".to_owned(), b"z".to_vec())) 382 | ); 383 | assert_eq!(range.next().await.transpose()?, None); 384 | std::mem::drop(range); 385 | // Nonconsensus range checks 386 | let mut range = tx10.nonverifiable_prefix_raw(b"i"); 387 | assert_eq!( 388 | range.next().await.transpose()?, 389 | Some((b"iA".to_vec(), b"A".to_vec())) 390 | ); 391 | assert_eq!( 392 | range.next().await.transpose()?, 393 | Some((b"iB".to_vec(), b"B".to_vec())) 394 | ); 395 | assert_eq!( 396 | range.next().await.transpose()?, 397 | Some((b"iC".to_vec(), b"C".to_vec())) 398 | ); 399 | assert_eq!( 400 | range.next().await.transpose()?, 401 | Some((b"iD".to_vec(), b"D".to_vec())) 402 | ); 403 | assert_eq!( 404 | range.next().await.transpose()?, 405 | Some((b"iF".to_vec(), b"F".to_vec())) 406 | ); 407 | assert_eq!(range.next().await.transpose()?, None); 408 | std::mem::drop(range); 409 | 410 | // Apply tx10 to state0 411 | tx10.apply(); 412 | 413 | // Check reads against state0 414 | // This is deleted in state0, present in JMT 415 | assert_eq!(state0.get_raw("test").await?, None); 416 | assert_eq!(state0.get_raw("a/aaa").await?, None); 417 | // This is missing in state0, present in JMT 418 | assert_eq!(state0.get_raw("a/aa").await?, Some(b"aa".to_vec())); 419 | assert_eq!(state0.get_raw("a/ab").await?, Some(b"ab".to_vec())); 420 | assert_eq!(state0.get_raw("a/z").await?, Some(b"z".to_vec())); 421 | // This is present in state0, missing in JMT 422 | assert_eq!(state0.get_raw("a/c").await?, Some(b"c".to_vec())); 423 | let mut range = state0.prefix_raw("a/"); 424 | assert_eq!( 425 | range.next().await.transpose()?, 426 | Some(("a/aa".to_owned(), b"aa".to_vec())) 427 | ); 428 | assert_eq!( 429 | range.next().await.transpose()?, 430 | Some(("a/ab".to_owned(), b"ab".to_vec())) 431 | ); 432 | assert_eq!( 433 | range.next().await.transpose()?, 434 | Some(("a/c".to_owned(), b"c".to_vec())) 435 | ); 436 | assert_eq!( 437 | range.next().await.transpose()?, 438 | Some(("a/z".to_owned(), b"z".to_vec())) 439 | ); 440 | assert_eq!(range.next().await.transpose()?, None); 441 | std::mem::drop(range); 442 | 443 | // Start building another transaction 444 | let mut tx11 = StateDelta::new(&mut state0); 445 | tx11.put_raw("a/ab".to_owned(), b"ab2".to_vec()); 446 | tx11.nonverifiable_delete(b"iD".to_vec()); 447 | 448 | // Check reads against tx11: 449 | // This is present in tx11, missing in state0, present in JMT 450 | assert_eq!(tx11.get_raw("a/ab").await?, Some(b"ab2".to_vec())); 451 | // This is missing in tx11, deleted in state0, present in JMT 452 | assert_eq!(tx11.get_raw("test").await?, None); 453 | assert_eq!(tx11.get_raw("a/aaa").await?, None); 454 | // This is missing in tx11, missing in state0, present in JMT 455 | assert_eq!(tx11.get_raw("a/aa").await?, Some(b"aa".to_vec())); 456 | assert_eq!(tx11.get_raw("a/z").await?, Some(b"z".to_vec())); 457 | // This is missing in tx10, present in state0, missing in JMT 458 | assert_eq!(tx11.get_raw("a/c").await?, Some(b"c".to_vec())); 459 | let mut range = tx11.prefix_raw("a/"); 460 | assert_eq!( 461 | range.next().await.transpose()?, 462 | Some(("a/aa".to_owned(), b"aa".to_vec())) 463 | ); 464 | assert_eq!( 465 | range.next().await.transpose()?, 466 | Some(("a/ab".to_owned(), b"ab2".to_vec())) 467 | ); 468 | assert_eq!( 469 | range.next().await.transpose()?, 470 | Some(("a/c".to_owned(), b"c".to_vec())) 471 | ); 472 | assert_eq!( 473 | range.next().await.transpose()?, 474 | Some(("a/z".to_owned(), b"z".to_vec())) 475 | ); 476 | assert_eq!(range.next().await.transpose()?, None); 477 | std::mem::drop(range); 478 | // Nonconsensus range checks 479 | let mut range = tx11.nonverifiable_prefix_raw(b"i"); 480 | assert_eq!( 481 | range.next().await.transpose()?, 482 | Some((b"iA".to_vec(), b"A".to_vec())) 483 | ); 484 | assert_eq!( 485 | range.next().await.transpose()?, 486 | Some((b"iB".to_vec(), b"B".to_vec())) 487 | ); 488 | assert_eq!( 489 | range.next().await.transpose()?, 490 | Some((b"iC".to_vec(), b"C".to_vec())) 491 | ); 492 | assert_eq!( 493 | range.next().await.transpose()?, 494 | Some((b"iF".to_vec(), b"F".to_vec())) 495 | ); 496 | assert_eq!(range.next().await.transpose()?, None); 497 | std::mem::drop(range); 498 | 499 | // Apply tx11 to state0 500 | tx11.apply(); 501 | 502 | // Check reads against state0 503 | // This is deleted in state0, present in JMT 504 | assert_eq!(state0.get_raw("test").await?, None); 505 | assert_eq!(state0.get_raw("a/aaa").await?, None); 506 | // This is missing in state0, present in JMT 507 | assert_eq!(state0.get_raw("a/aa").await?, Some(b"aa".to_vec())); 508 | assert_eq!(state0.get_raw("a/z").await?, Some(b"z".to_vec())); 509 | // This is present in state0, missing in JMT 510 | assert_eq!(state0.get_raw("a/c").await?, Some(b"c".to_vec())); 511 | // This is present in state0, present in JMT 512 | assert_eq!(state0.get_raw("a/ab").await?, Some(b"ab2".to_vec())); 513 | let mut range = state0.prefix_raw("a/"); 514 | assert_eq!( 515 | range.next().await.transpose()?, 516 | Some(("a/aa".to_owned(), b"aa".to_vec())) 517 | ); 518 | assert_eq!( 519 | range.next().await.transpose()?, 520 | Some(("a/ab".to_owned(), b"ab2".to_vec())) 521 | ); 522 | assert_eq!( 523 | range.next().await.transpose()?, 524 | Some(("a/c".to_owned(), b"c".to_vec())) 525 | ); 526 | assert_eq!( 527 | range.next().await.transpose()?, 528 | Some(("a/z".to_owned(), b"z".to_vec())) 529 | ); 530 | assert_eq!(range.next().await.transpose()?, None); 531 | std::mem::drop(range); 532 | let mut range = state0.nonverifiable_prefix_raw(b"i"); 533 | assert_eq!( 534 | range.next().await.transpose()?, 535 | Some((b"iA".to_vec(), b"A".to_vec())) 536 | ); 537 | assert_eq!( 538 | range.next().await.transpose()?, 539 | Some((b"iB".to_vec(), b"B".to_vec())) 540 | ); 541 | assert_eq!( 542 | range.next().await.transpose()?, 543 | Some((b"iC".to_vec(), b"C".to_vec())) 544 | ); 545 | assert_eq!( 546 | range.next().await.transpose()?, 547 | Some((b"iF".to_vec(), b"F".to_vec())) 548 | ); 549 | assert_eq!(range.next().await.transpose()?, None); 550 | std::mem::drop(range); 551 | 552 | // Create another fork of state 0 while we've edited the first one but before we commit. 553 | let state0a = storage.latest_snapshot(); 554 | assert_eq!(state0a.version(), 0); 555 | 556 | // Commit state0 as state1. 557 | storage.commit_delta(state0).await?; 558 | 559 | let state1 = storage.latest_snapshot(); 560 | assert_eq!(state1.version(), 1); 561 | 562 | // Check reads against state1 563 | assert_eq!(state1.get_raw("test").await?, None); 564 | assert_eq!(state1.get_raw("a/aaa").await?, None); 565 | assert_eq!(state1.get_raw("a/aa").await?, Some(b"aa".to_vec())); 566 | assert_eq!(state1.get_raw("a/ab").await?, Some(b"ab2".to_vec())); 567 | assert_eq!(state1.get_raw("a/z").await?, Some(b"z".to_vec())); 568 | assert_eq!(state1.get_raw("a/c").await?, Some(b"c".to_vec())); 569 | let mut range = state1.prefix_raw("a/"); 570 | assert_eq!( 571 | range.next().await.transpose()?, 572 | Some(("a/aa".to_owned(), b"aa".to_vec())) 573 | ); 574 | assert_eq!( 575 | range.next().await.transpose()?, 576 | Some(("a/ab".to_owned(), b"ab2".to_vec())) 577 | ); 578 | assert_eq!( 579 | range.next().await.transpose()?, 580 | Some(("a/c".to_owned(), b"c".to_vec())) 581 | ); 582 | assert_eq!( 583 | range.next().await.transpose()?, 584 | Some(("a/z".to_owned(), b"z".to_vec())) 585 | ); 586 | assert_eq!(range.next().await.transpose()?, None); 587 | std::mem::drop(range); 588 | let mut range = state1.nonverifiable_prefix_raw(b"i"); 589 | assert_eq!( 590 | range.next().await.transpose()?, 591 | Some((b"iA".to_vec(), b"A".to_vec())) 592 | ); 593 | assert_eq!( 594 | range.next().await.transpose()?, 595 | Some((b"iB".to_vec(), b"B".to_vec())) 596 | ); 597 | assert_eq!( 598 | range.next().await.transpose()?, 599 | Some((b"iC".to_vec(), b"C".to_vec())) 600 | ); 601 | assert_eq!( 602 | range.next().await.transpose()?, 603 | Some((b"iF".to_vec(), b"F".to_vec())) 604 | ); 605 | assert_eq!(range.next().await.transpose()?, None); 606 | std::mem::drop(range); 607 | 608 | // Check reads against state0a 609 | assert_eq!(state0a.get_raw("test").await?, Some(b"test".to_vec())); 610 | assert_eq!(state0a.get_raw("a/aa").await?, Some(b"aa".to_vec())); 611 | assert_eq!(state0a.get_raw("a/aaa").await?, Some(b"aaa".to_vec())); 612 | assert_eq!(state0a.get_raw("a/ab").await?, Some(b"ab".to_vec())); 613 | assert_eq!(state0a.get_raw("a/z").await?, Some(b"z".to_vec())); 614 | assert_eq!(state0a.get_raw("a/c").await?, None); 615 | let mut range = state0a.prefix_raw("a/"); 616 | assert_eq!( 617 | range.next().await.transpose()?, 618 | Some(("a/aa".to_owned(), b"aa".to_vec())) 619 | ); 620 | assert_eq!( 621 | range.next().await.transpose()?, 622 | Some(("a/aaa".to_owned(), b"aaa".to_vec())) 623 | ); 624 | assert_eq!( 625 | range.next().await.transpose()?, 626 | Some(("a/ab".to_owned(), b"ab".to_vec())) 627 | ); 628 | assert_eq!( 629 | range.next().await.transpose()?, 630 | Some(("a/z".to_owned(), b"z".to_vec())) 631 | ); 632 | assert_eq!(range.next().await.transpose()?, None); 633 | std::mem::drop(range); 634 | // Nonconsensus range checks 635 | let mut range = state0a.nonverifiable_prefix_raw(b"i"); 636 | assert_eq!( 637 | range.next().await.transpose()?, 638 | Some((b"iA".to_vec(), b"A".to_vec())) 639 | ); 640 | assert_eq!( 641 | range.next().await.transpose()?, 642 | Some((b"iC".to_vec(), b"C".to_vec())) 643 | ); 644 | assert_eq!( 645 | range.next().await.transpose()?, 646 | Some((b"iD".to_vec(), b"D".to_vec())) 647 | ); 648 | assert_eq!( 649 | range.next().await.transpose()?, 650 | Some((b"iF".to_vec(), b"F".to_vec())) 651 | ); 652 | assert_eq!(range.next().await.transpose()?, None); 653 | std::mem::drop(range); 654 | 655 | // Now, check that closing and reloading works. 656 | 657 | // First, be sure to explicitly drop anything keeping a reference to the 658 | // RocksDB instance: 659 | std::mem::drop(storage); 660 | // std::mem::drop(state0); // consumed in commit() 661 | std::mem::drop(state0a); 662 | std::mem::drop(state1); 663 | 664 | // Now reload the storage from the same directory... 665 | let storage_a = Storage::load(tmpdir.path().to_owned()).await?; 666 | let state1a = storage_a.latest_snapshot(); 667 | 668 | // Check that we reload at the correct version ... 669 | assert_eq!(state1a.version(), 1); 670 | 671 | // Check reads against state1a after reloading the DB 672 | assert_eq!(state1a.get_raw("test").await?, None); 673 | assert_eq!(state1a.get_raw("a/aaa").await?, None); 674 | assert_eq!(state1a.get_raw("a/aa").await?, Some(b"aa".to_vec())); 675 | assert_eq!(state1a.get_raw("a/ab").await?, Some(b"ab2".to_vec())); 676 | assert_eq!(state1a.get_raw("a/z").await?, Some(b"z".to_vec())); 677 | assert_eq!(state1a.get_raw("a/c").await?, Some(b"c".to_vec())); 678 | let mut range = state1a.prefix_raw("a/"); 679 | assert_eq!( 680 | range.next().await.transpose()?, 681 | Some(("a/aa".to_owned(), b"aa".to_vec())) 682 | ); 683 | assert_eq!( 684 | range.next().await.transpose()?, 685 | Some(("a/ab".to_owned(), b"ab2".to_vec())) 686 | ); 687 | assert_eq!( 688 | range.next().await.transpose()?, 689 | Some(("a/c".to_owned(), b"c".to_vec())) 690 | ); 691 | assert_eq!( 692 | range.next().await.transpose()?, 693 | Some(("a/z".to_owned(), b"z".to_vec())) 694 | ); 695 | assert_eq!(range.next().await.transpose()?, None); 696 | std::mem::drop(range); 697 | // Nonconsensus range checks 698 | let mut range = state1a.nonverifiable_prefix_raw(b"i"); 699 | assert_eq!( 700 | range.next().await.transpose()?, 701 | Some((b"iA".to_vec(), b"A".to_vec())) 702 | ); 703 | assert_eq!( 704 | range.next().await.transpose()?, 705 | Some((b"iB".to_vec(), b"B".to_vec())) 706 | ); 707 | assert_eq!( 708 | range.next().await.transpose()?, 709 | Some((b"iC".to_vec(), b"C".to_vec())) 710 | ); 711 | assert_eq!( 712 | range.next().await.transpose()?, 713 | Some((b"iF".to_vec(), b"F".to_vec())) 714 | ); 715 | assert_eq!(range.next().await.transpose()?, None); 716 | std::mem::drop(range); 717 | 718 | Ok(()) 719 | } 720 | 721 | */ 722 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use anyhow::bail; 2 | /// Splits a range into a tuple of start and end bounds, ignoring the inclusive/exclusive 3 | /// nature of the range bounds. And returns a tuple consisting of the range implementation, 4 | /// and the start and end bounds. 5 | /// # Errors 6 | /// This method returns an error when the range is inclusive on the end bound, 7 | /// and when the lower bound is greater than the upper bound. 8 | #[allow(clippy::type_complexity)] 9 | pub(crate) fn convert_bounds( 10 | range: impl std::ops::RangeBounds>, 11 | ) -> anyhow::Result<( 12 | impl std::ops::RangeBounds>, 13 | (Option>, Option>), 14 | )> { 15 | let start = match range.start_bound() { 16 | std::ops::Bound::Included(v) => Some(v.clone()), 17 | std::ops::Bound::Excluded(v) => Some(v.clone()), 18 | std::ops::Bound::Unbounded => None, 19 | }; 20 | 21 | let end = match range.end_bound() { 22 | std::ops::Bound::Included(_) => bail!("included end bound not supported"), 23 | std::ops::Bound::Excluded(v) => Some(v.clone()), 24 | std::ops::Bound::Unbounded => None, 25 | }; 26 | 27 | if let (Some(k_start), Some(k_end)) = (&start, &end) { 28 | if k_start > k_end { 29 | bail!("lower bound is greater than upper bound") 30 | } 31 | } 32 | 33 | Ok((range, (start, end))) 34 | } 35 | -------------------------------------------------------------------------------- /src/write.rs: -------------------------------------------------------------------------------- 1 | use crate::StateRead; 2 | use std::{any::Any, collections::BTreeMap}; 3 | use tendermint::abci; 4 | 5 | /// Write access to chain state. 6 | pub trait StateWrite: StateRead + Send + Sync { 7 | /// Puts raw bytes into the verifiable key-value store with the given key. 8 | fn put_raw(&mut self, key: String, value: Vec); 9 | 10 | /// Delete a key from the verifiable key-value store. 11 | fn delete(&mut self, key: String); 12 | 13 | /// Puts raw bytes into the non-verifiable key-value store with the given key. 14 | fn nonverifiable_put_raw(&mut self, key: Vec, value: Vec); 15 | 16 | /// Delete a key from non-verifiable key-value storage. 17 | fn nonverifiable_delete(&mut self, key: Vec); 18 | 19 | /// Puts an object into the ephemeral object store with the given key. 20 | /// 21 | /// # Panics 22 | /// 23 | /// If the object is already present in the store, but its type is not the same as the type of 24 | /// `value`. 25 | fn object_put(&mut self, key: &'static str, value: T); 26 | 27 | /// Deletes a key from the ephemeral object store. 28 | fn object_delete(&mut self, key: &'static str); 29 | 30 | /// Merge a set of object changes into this `StateWrite`. 31 | /// 32 | /// Unlike `object_put`, this avoids re-boxing values and messing up the downcasting. 33 | fn object_merge(&mut self, objects: BTreeMap<&'static str, Option>>); 34 | 35 | /// Record that an ABCI event occurred while building up this set of state changes. 36 | fn record(&mut self, event: abci::Event); 37 | } 38 | 39 | impl<'a, S: StateWrite + Send + Sync> StateWrite for &'a mut S { 40 | fn put_raw(&mut self, key: String, value: jmt::OwnedValue) { 41 | (**self).put_raw(key, value) 42 | } 43 | 44 | fn delete(&mut self, key: String) { 45 | (**self).delete(key) 46 | } 47 | 48 | fn nonverifiable_delete(&mut self, key: Vec) { 49 | (**self).nonverifiable_delete(key) 50 | } 51 | 52 | fn nonverifiable_put_raw(&mut self, key: Vec, value: Vec) { 53 | (**self).nonverifiable_put_raw(key, value) 54 | } 55 | 56 | fn object_put(&mut self, key: &'static str, value: T) { 57 | (**self).object_put(key, value) 58 | } 59 | 60 | fn object_delete(&mut self, key: &'static str) { 61 | (**self).object_delete(key) 62 | } 63 | 64 | fn object_merge( 65 | &mut self, 66 | objects: BTreeMap<&'static str, Option>>, 67 | ) { 68 | (**self).object_merge(objects) 69 | } 70 | 71 | fn record(&mut self, event: abci::Event) { 72 | (**self).record(event) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/write_batch.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | // HashMap is okay here because we don't care about ordering of substore roots. 4 | use std::collections::HashMap; 5 | 6 | use crate::{ 7 | cache::Cache, 8 | store::{multistore, substore::SubstoreConfig}, 9 | RootHash, 10 | }; 11 | 12 | /// A staged write batch that can be committed to RocksDB. 13 | /// 14 | /// This allows for write batches to be prepared and committed at a later time. 15 | pub struct StagedWriteBatch { 16 | /// The write batch to commit to RocksDB. 17 | pub(crate) write_batch: rocksdb::WriteBatch, 18 | /// The new version of the chain state. 19 | pub(crate) version: jmt::Version, 20 | /// The new versions of each substore. 21 | pub(crate) multistore_versions: multistore::MultistoreCache, 22 | /// The root hash of the chain state corresponding to this set of changes. 23 | pub(crate) root_hash: RootHash, 24 | /// The configs, root hashes, and new versions of each substore 25 | /// that was updated in this batch. 26 | #[allow(clippy::disallowed_types)] 27 | pub(crate) substore_roots: HashMap, (RootHash, u64)>, 28 | /// Whether or not to perform a migration. 29 | pub(crate) perform_migration: bool, 30 | /// A lightweight copy of the changeset, this is useful to provide 31 | /// a stream of changes to subscribers. 32 | pub(crate) changes: Arc, 33 | } 34 | 35 | impl StagedWriteBatch { 36 | /// Returns the new version of the chain state corresponding to this set of changes. 37 | pub fn version(&self) -> jmt::Version { 38 | self.version 39 | } 40 | 41 | /// Returns the root hash of the jmt corresponding to this set of changes. 42 | pub fn root_hash(&self) -> &RootHash { 43 | &self.root_hash 44 | } 45 | 46 | /// Returns the version of a substore in this batch, if it exists 47 | /// and `None` otherwise. 48 | pub fn substore_version(&self, prefix: &str) -> Option { 49 | let Some(substore_config) = self 50 | .multistore_versions 51 | .config 52 | .find_substore(prefix.as_bytes()) 53 | else { 54 | return None; 55 | }; 56 | 57 | self.multistore_versions.get_version(&substore_config) 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /tests/write_batch.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Result; 2 | use cnidarium::{StateDelta, StateWrite, Storage}; 3 | use tempfile; 4 | use tokio; 5 | 6 | #[tokio::test] 7 | /// A simple test that checks that we cannot commit a stale batch to storage. 8 | /// Strategy: 9 | /// Create three state deltas, one that writes to every substore, and two others 10 | /// that target specific substores or none at all. 11 | pub async fn test_write_batch_stale_version_substores() -> Result<()> { 12 | let _ = tracing_subscriber::fmt::try_init(); 13 | let tmpdir = tempfile::tempdir()?; 14 | let db_path = tmpdir.into_path(); 15 | let substore_prefixes = vec![ 16 | "ibc".to_string(), 17 | "dex".to_string(), 18 | "misc".to_string(), 19 | "cometbft-data".to_string(), 20 | ]; 21 | let storage = Storage::load(db_path.clone(), substore_prefixes.clone()).await?; 22 | let initial_snapshot = storage.latest_snapshot(); 23 | let initial_version = initial_snapshot.version(); 24 | let initial_root_hash = initial_snapshot.root_hash().await?; 25 | assert_eq!( 26 | initial_version, 27 | u64::MAX, 28 | "initial version should be u64::MAX" 29 | ); 30 | assert_eq!(initial_root_hash.0, [0u8; 32]); 31 | 32 | /* ************************ Prepare three deltas ************************** */ 33 | // Our goal is to check that we can't commit a batch with a stale version. 34 | // We create three deltas: 35 | // 1. Empty delta 36 | // 2. Delta that writes to one substore 37 | // 3. Delta that writes to each substore and also writes to the main store 38 | 39 | /* We create an empty delta that writes no keys. */ 40 | let delta_1 = StateDelta::new(initial_snapshot); 41 | let write_batch_1 = storage.prepare_commit(delta_1).await?; 42 | let version_1 = write_batch_1.version(); 43 | let root_hash_1 = write_batch_1.root_hash().clone(); 44 | assert_eq!(version_1, initial_version.wrapping_add(1)); 45 | assert_ne!(root_hash_1.0, initial_root_hash.0); 46 | 47 | // We check that merely preparing a batch does not write anything. 48 | let state_snapshot = storage.latest_snapshot(); 49 | assert_eq!(state_snapshot.version(), initial_version); 50 | assert_eq!(state_snapshot.root_hash().await?.0, initial_root_hash.0); 51 | for prefix in substore_prefixes.iter() { 52 | // We didn't write to any substores, so their version should be unchanged. 53 | assert_eq!( 54 | write_batch_1 55 | .substore_version(prefix) 56 | .expect("substore exists"), 57 | u64::MAX 58 | ) 59 | } 60 | 61 | /* We create a new delta that writes to a single substore. */ 62 | let mut delta_2 = StateDelta::new(state_snapshot.clone()); 63 | delta_2.put_raw("ibc/key".to_string(), [1u8; 32].to_vec()); 64 | let write_batch_2 = storage.prepare_commit(delta_2).await?; 65 | let version_2 = write_batch_2.version(); 66 | let root_hash_2 = write_batch_2.root_hash(); 67 | assert_eq!(version_2, initial_version.wrapping_add(1)); 68 | assert_ne!(root_hash_2.0, initial_root_hash.0); 69 | 70 | // Now, we check that the version for the main store is incremented, and 71 | // only the version for the ibc substore is incremented. 72 | assert_eq!(write_batch_2.version(), initial_version.wrapping_add(1)); 73 | assert_eq!( 74 | write_batch_2 75 | .substore_version("ibc") 76 | .expect("substore_exists"), 77 | initial_version.wrapping_add(1) 78 | ); 79 | for prefix in substore_prefixes.iter().filter(|p| *p != "ibc") { 80 | assert_eq!( 81 | write_batch_2 82 | .substore_version(prefix) 83 | .expect("substore exists"), 84 | u64::MAX 85 | ) 86 | } 87 | 88 | /* We create a new delta that writes to each substore. */ 89 | let mut delta_3 = StateDelta::new(state_snapshot); 90 | for substore_prefix in substore_prefixes.iter() { 91 | let key = format!("{}/key", substore_prefix); 92 | tracing::debug!(?key, "adding to delta_1"); 93 | delta_3.put_raw(key, [1u8; 32].to_vec()); 94 | } 95 | let write_batch_3 = storage.prepare_commit(delta_3).await?; 96 | let version_3 = write_batch_3.version(); 97 | let root_hash_3 = write_batch_3.root_hash().clone(); 98 | 99 | // Once again, we check that we incremented the main store version. 100 | assert_eq!(version_3, initial_version.wrapping_add(1)); 101 | assert_ne!(root_hash_3.0, initial_root_hash.0); 102 | // In addition to that, we check that we incremented the version of each substore. 103 | for prefix in substore_prefixes.iter() { 104 | assert_eq!( 105 | write_batch_3 106 | .substore_version(prefix) 107 | .expect("substore exists"), 108 | initial_version.wrapping_add(1) 109 | ) 110 | } 111 | 112 | /* Persist `write_batch_1` and check that the two other (stale) deltas cannot be applied. */ 113 | let final_root = storage 114 | .commit_batch(write_batch_1) 115 | .expect("committing batch 3 should work"); 116 | let final_snapshot = storage.latest_snapshot(); 117 | assert_eq!(root_hash_1.0, final_root.0); 118 | assert_eq!(root_hash_1.0, final_snapshot.root_hash().await?.0); 119 | assert_eq!(version_1, final_snapshot.version()); 120 | assert!( 121 | storage.commit_batch(write_batch_2).is_err(), 122 | "committing batch 2 should fail" 123 | ); 124 | assert!( 125 | storage.commit_batch(write_batch_3).is_err(), 126 | "committing batch 3 should fail" 127 | ); 128 | 129 | Ok(()) 130 | } 131 | 132 | #[tokio::test] 133 | /// Test that we can commit a batch without incrementing the substore versions if there are no 134 | /// keys to write. 135 | pub async fn test_two_empty_writes() -> Result<()> { 136 | let _ = tracing_subscriber::fmt::try_init(); 137 | let tmpdir = tempfile::tempdir()?; 138 | let db_path = tmpdir.into_path(); 139 | let substore_prefixes = vec![ 140 | "ibc".to_string(), 141 | "dex".to_string(), 142 | "misc".to_string(), 143 | "cometbft-data".to_string(), 144 | ]; 145 | let storage = Storage::load(db_path.clone(), substore_prefixes.clone()).await?; 146 | let initial_snapshot = storage.latest_snapshot(); 147 | let initial_version = initial_snapshot.version(); 148 | let initial_root_hash = initial_snapshot.root_hash().await?; 149 | assert_eq!( 150 | initial_version, 151 | u64::MAX, 152 | "initial version should be u64::MAX" 153 | ); 154 | assert_eq!(initial_root_hash.0, [0u8; 32]); 155 | 156 | let mut delta_1 = StateDelta::new(initial_snapshot); 157 | for substore_prefix in substore_prefixes.iter() { 158 | let key = format!("{}/key", substore_prefix); 159 | tracing::debug!(?key, "adding to delta_1"); 160 | delta_1.put_raw(key, [1u8; 12].to_vec()); 161 | } 162 | let write_batch_1 = storage.prepare_commit(delta_1).await?; 163 | let version_1 = write_batch_1.version(); 164 | let root_hash_1 = write_batch_1.root_hash().clone(); 165 | 166 | assert_eq!(version_1, initial_version.wrapping_add(1)); 167 | assert_ne!(root_hash_1.0, initial_root_hash.0); 168 | for prefix in substore_prefixes.iter() { 169 | assert_eq!( 170 | write_batch_1 171 | .substore_version(prefix) 172 | .expect("substore exists"), 173 | initial_version.wrapping_add(1) 174 | ) 175 | } 176 | 177 | // We check that merely preparing a batch does not write anything. 178 | let state_snapshot = storage.latest_snapshot(); 179 | assert_eq!(state_snapshot.version(), initial_version); 180 | assert_eq!(state_snapshot.root_hash().await?.0, initial_root_hash.0); 181 | 182 | /* We create a new delta that writes no keys */ 183 | let delta_2 = StateDelta::new(state_snapshot.clone()); 184 | let write_batch_2 = storage.prepare_commit(delta_2).await?; 185 | let version_2 = write_batch_2.version(); 186 | let root_hash_2 = write_batch_2.root_hash(); 187 | assert_eq!(version_2, initial_version.wrapping_add(1)); 188 | assert_ne!(root_hash_2.0, initial_root_hash.0); 189 | assert_eq!(write_batch_2.version(), initial_version.wrapping_add(1)); 190 | for prefix in substore_prefixes.iter() { 191 | assert_eq!( 192 | write_batch_2 193 | .substore_version(prefix) 194 | .expect("substore exists"), 195 | initial_version 196 | ) 197 | } 198 | 199 | let block_1_root = storage 200 | .commit_batch(write_batch_1) 201 | .expect("committing batch 3 should work"); 202 | let block_1_snapshot = storage.latest_snapshot(); 203 | let block_1_version = block_1_snapshot.version(); 204 | assert_eq!(root_hash_1.0, block_1_root.0); 205 | assert_eq!(root_hash_1.0, block_1_snapshot.root_hash().await?.0); 206 | assert_eq!(version_1, block_1_version); 207 | assert!( 208 | storage.commit_batch(write_batch_2).is_err(), 209 | "committing batch 2 should fail" 210 | ); 211 | 212 | /* We create an empty delta that writes no keys. */ 213 | let delta_3 = StateDelta::new(block_1_snapshot); 214 | let write_batch_3 = storage.prepare_commit(delta_3).await?; 215 | let version_3 = write_batch_3.version(); 216 | let root_hash_3 = write_batch_3.root_hash().clone(); 217 | assert_eq!(version_3, block_1_version.wrapping_add(1)); 218 | 219 | /* Check that we can apply `write_batch_3` */ 220 | let block_2_root = storage 221 | .commit_batch(write_batch_3) 222 | .expect("committing batch 3 should work"); 223 | let block_2_snapshot = storage.latest_snapshot(); 224 | let block_2_version = block_2_snapshot.version(); 225 | assert_eq!(root_hash_3.0, block_2_root.0); 226 | assert_eq!(root_hash_3.0, block_2_snapshot.root_hash().await?.0); 227 | assert_eq!(version_3, block_2_version); 228 | Ok(()) 229 | } 230 | 231 | #[tokio::test] 232 | /// Test that we can write prepare-commit batches that write to every 233 | /// substore. 234 | /// Intuition: we want to make sure that the version check that guards us from 235 | /// writing stale batches, is working as expected. 236 | pub async fn test_batch_substore() -> Result<()> { 237 | let _ = tracing_subscriber::fmt::try_init(); 238 | let tmpdir = tempfile::tempdir()?; 239 | let db_path = tmpdir.into_path(); 240 | let substore_prefixes = vec![ 241 | "ibc".to_string(), 242 | "dex".to_string(), 243 | "misc".to_string(), 244 | "cometbft-data".to_string(), 245 | ]; 246 | let storage = Storage::load(db_path.clone(), substore_prefixes.clone()).await?; 247 | let initial_snapshot = storage.latest_snapshot(); 248 | let initial_version = initial_snapshot.version(); 249 | let initial_root_hash = initial_snapshot.root_hash().await?; 250 | assert_eq!( 251 | initial_version, 252 | u64::MAX, 253 | "initial version should be u64::MAX" 254 | ); 255 | assert_eq!(initial_root_hash.0, [0u8; 32]); 256 | 257 | for i in 0..100 { 258 | let snapshot = storage.latest_snapshot(); 259 | let prev_version = snapshot.version(); 260 | let prev_root = snapshot 261 | .root_hash() 262 | .await 263 | .expect("a root hash is available"); 264 | 265 | let mut delta = StateDelta::new(snapshot); 266 | for substore_prefix in substore_prefixes.iter() { 267 | let key = format!("{}/key_{i}", substore_prefix); 268 | tracing::debug!(?key, index = i, "adding to delta"); 269 | delta.put_raw(key, [1u8; 12].to_vec()); 270 | } 271 | let write_batch = storage.prepare_commit(delta).await?; 272 | let next_version = write_batch.version(); 273 | let next_root = write_batch.root_hash().clone(); 274 | 275 | assert_eq!(next_version, prev_version.wrapping_add(1)); 276 | assert_ne!(next_root.0, prev_root.0); 277 | for prefix in substore_prefixes.iter() { 278 | assert_eq!( 279 | write_batch 280 | .substore_version(prefix) 281 | .expect("substore exists"), 282 | prev_version.wrapping_add(1) 283 | ) 284 | } 285 | 286 | // We check that merely preparing a batch does not write anything. 287 | let state_snapshot = storage.latest_snapshot(); 288 | assert_eq!(state_snapshot.version(), prev_version); 289 | assert_eq!(state_snapshot.root_hash().await?.0, prev_root.0); 290 | 291 | let block_root = storage 292 | .commit_batch(write_batch) 293 | .expect("committing batch 3 should work"); 294 | let block_snapshot = storage.latest_snapshot(); 295 | let block_version = block_snapshot.version(); 296 | assert_eq!(next_root.0, block_root.0); 297 | assert_eq!(next_root.0, block_snapshot.root_hash().await?.0); 298 | assert_eq!(next_version, block_version); 299 | } 300 | 301 | Ok(()) 302 | } 303 | -------------------------------------------------------------------------------- /tools/proto-compiler/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "penumbra-proto-compiler" 3 | version = "0.1.0" 4 | edition = "2021" 5 | publish = false 6 | 7 | [dependencies] 8 | anyhow = "1" 9 | ibc-proto = { version = "0.51.1" } 10 | ics23 = "0.12.0" 11 | pbjson = "0.7" 12 | pbjson-build = "0.7" 13 | pbjson-types = "0.7" 14 | prost = "0.13.3" 15 | prost-build = "0.13.3" 16 | prost-types = "0.13.3" 17 | tempfile = "3" 18 | tonic-build = { version = "0.13.0", features = ["cleanup-markdown"] } 19 | -------------------------------------------------------------------------------- /tools/proto-compiler/README.md: -------------------------------------------------------------------------------- 1 | ## How to compile fresh proto structs 2 | 3 | * `cargo run` in the compiler folder. 4 | 5 | The resultant structs will be created in the `src/gen` folder. 6 | -------------------------------------------------------------------------------- /tools/proto-compiler/src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | fn main() -> anyhow::Result<()> { 4 | let root = PathBuf::from(env!("CARGO_MANIFEST_DIR")); 5 | println!("root: {}", root.display()); 6 | 7 | let target_dir = root.join("..").join("..").join("src").join("gen"); 8 | println!("target_dir: {}", target_dir.display()); 9 | 10 | // https://github.com/penumbra-zone/penumbra/issues/3038#issuecomment-1722534133 11 | // Using the "no_lfs" suffix prevents matching a catch-all LFS rule. 12 | let descriptor_file_name = "proto_descriptor.bin.no_lfs"; 13 | 14 | // prost_build::Config isn't Clone, so we need to make two. 15 | let mut config = prost_build::Config::new(); 16 | 17 | config.compile_well_known_types(); 18 | // As recommended in pbjson_types docs. 19 | config.extern_path(".google.protobuf", "::pbjson_types"); 20 | // NOTE: we need this because the rust module that defines the IBC types is external, and not 21 | // part of this crate. 22 | // See https://docs.rs/prost-build/0.5.0/prost_build/struct.Config.html#method.extern_path 23 | config.extern_path(".ibc", "::ibc_proto::ibc"); 24 | // TODO: which of these is the right path? 25 | config.extern_path(".ics23", "::ics23"); 26 | config.extern_path(".cosmos.ics23", "::ics23"); 27 | 28 | config 29 | .out_dir(&target_dir) 30 | .file_descriptor_set_path(&target_dir.join(descriptor_file_name)) 31 | .enable_type_names(); 32 | 33 | let rpc_doc_attr = r#"#[cfg(feature = "rpc")]"#; 34 | 35 | tonic_build::configure() 36 | .out_dir(&target_dir) 37 | .emit_rerun_if_changed(false) 38 | .server_mod_attribute(".", rpc_doc_attr) 39 | .client_mod_attribute(".", rpc_doc_attr) 40 | .compile_protos_with_config( 41 | config, 42 | &["../../proto/penumbra/penumbra/cnidarium/v1/cnidarium.proto"], 43 | &["../../proto/penumbra/", "../../proto/rust-vendored/"], 44 | )?; 45 | // Finally, build pbjson Serialize, Deserialize impls: 46 | let descriptor_set = std::fs::read(target_dir.join(descriptor_file_name))?; 47 | 48 | pbjson_build::Builder::new() 49 | .register_descriptors(&descriptor_set)? 50 | .ignore_unknown_fields() 51 | .out_dir(&target_dir) 52 | .build(&[".penumbra"])?; 53 | 54 | Ok(()) 55 | } 56 | --------------------------------------------------------------------------------