├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── analysis ├── Cargo.toml └── src │ ├── accounts.rs │ └── main.rs └── preimages ├── Cargo.toml └── src ├── cmds.rs ├── iterators ├── eip7748.rs ├── mod.rs └── plain.rs ├── main.rs └── progress.rs /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | debug/ 4 | target/ 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | # RustRover 17 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 18 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 19 | # and can be added to the global gitignore or merged into this file. For a more nuclear 20 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 21 | #.idea/ 22 | 23 | *.bin -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["analysis", "preimages"] 3 | resolver = "2" 4 | 5 | [workspace.dependencies] 6 | reth-db = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 7 | reth-db-api = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 8 | reth-chainspec = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 9 | reth-provider = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 10 | reth-node-types = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 11 | reth-node-ethereum = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 12 | reth-stages = { git = "https://github.com/paradigmxyz/reth.git", tag = "v1.1.5" } 13 | alloy-primitives = "0.8.15" 14 | anyhow = "1.0.40" 15 | alloy-chains = "=0.1.55" 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Ignacio Hagopian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # eth-stateless 2 | 3 | A toolbox for various Ethereum stateless tasks, such as: 4 | 5 | - Merkle Patricia Trie preimages exporter, verifier and frequency analysis. 6 | 7 | ## Prerequisites 8 | 9 | - Rust toolchain (stable) 10 | - Cargo 11 | - `--datadir` folder of a synced full-node Reth (i.e: archive node _not_ required) 12 | 13 | ## Preimages 14 | 15 | ```text 16 | Usage: preimages --datadir 17 | 18 | Commands: 19 | generate Generate preimage file 20 | verify Verify preimage file 21 | storage-slot-freq Analyze storage-slot 29-byte prefix frequency and size impact 22 | help Print this message or the help of the given subcommand(s) 23 | 24 | Options: 25 | -d, --datadir Reth datadir path 26 | -h, --help Print help 27 | ``` 28 | 29 | ### Commands 30 | 31 | The tool provides two subcommands for preimages: 32 | 33 | - `generate`: Generate preimage file 34 | - `verify`: Verify preimage file 35 | - `storage-slot-freq` does a frequency analysis of the 29-byte prefix of storage slots 36 | 37 | For the `generate` and `verify` commands, two ordering modes are supported: 38 | 39 | - `--plain`: Use plain ordering (i.e. unhashed) 40 | - `--eip7748`: Use EIP-7748 ordering (i.e. trie(s) DFS (hashed)) 41 | 42 | ### Generate 43 | 44 | ```text 45 | $ cargo run -p preimages -- generate --help 46 | Generate preimage file 47 | 48 | Usage: preimages --datadir generate [OPTIONS] <--plain|--eip7748> 49 | 50 | Options: 51 | --output-path Preimages file output path [default: preimages.bin] 52 | --plain Use plain ordering 53 | --eip7748 Use EIP-7748 ordering (i.e: hashed) 54 | -h, --help Print help 55 | ``` 56 | 57 | Examples: 58 | 59 | ```text 60 | $ cargo run -p preimages --release -- --datadir= generate --plain 61 | Database block number: 21547467 62 | [1/1] Generating preimage file... 63 | #####>-------------------------------------------- 10% [eta: 32m] 0x19eaf81a0c1215b7e50524f42594d9496e0ec640 64 | ``` 65 | 66 | ```text 67 | $ cargo run -p preimages --release -- --datadir= generate --eip7748 68 | Database block number: 21547467 69 | [1/2] Ordering account addresses by hash... 70 | #################################################> 100% [eta: 0s] fffec5f54c839fc4a744bebaede23b6e4904007c 71 | [2/2] Generating preimage file... 72 | #####>-------------------------------------------- 11% [eta: 49m] 1cb3c5ece6021f2d9bf63ba877f8dfc717db509ed66431bebb90c60fedb551ba 73 | ``` 74 | 75 | ### Verify 76 | 77 | ```text 78 | Verify preimage file 79 | 80 | Usage: preimages --datadir verify [OPTIONS] <--plain|--eip7748> 81 | 82 | Options: 83 | -i, --preimages-file-path Preimages file path [default: preimages.bin] 84 | --plain Use plain ordering 85 | --eip7748 Use EIP-7748 ordering (i.e: hashed) 86 | -h, --help Print help 87 | ``` 88 | 89 | Example verifying a generated `--eip7748` preimage file: 90 | 91 | ```text 92 | $ cargo run -p preimages --release -- --datadir=/fast/reth/reth_data verify --path preimages.bin --eip7748 93 | Database block number: 21547467 94 | #>------------------------------------------------ 2% [eta: 54m] 063f6a4b1968bd386869d8f9083e6d5b9525ccf980ab4d4d8d42d824dccaf1ab 95 | ``` 96 | 97 | If we try to verify it with `--plain` it should obviously fail since the expected ordering is different: 98 | 99 | ```text 100 | $ cargo run -p preimages --release -- --datadir=/fast/reth/reth_data verify --path preimages.bin --plain 101 | Database block number: 21547467 102 | [1/2] Verifying provided preimage file... 103 | Error: Address 0xEA46927B4Fc92248d052299FBFCC6778421930C6 preimage mismatch 104 | ``` 105 | 106 | ### Storage slots 29-byte prefix frequency and size impact analysis 107 | 108 | ```text 109 | $ cargo run -p preimages --release -- --datadir=/fast/reth/reth_data storage-slot-freq --help 110 | Analyze top N storage slot frequency 111 | 112 | Usage: preimages --datadir storage-slot-freq 113 | 114 | Options: 115 | -h, --help Print help 116 | ``` 117 | 118 | Example: 119 | 120 | ```text 121 | $ cargo run -p preimages --release -- --datadir=/fast/reth/reth_data storage-slot-freq 122 | Database block number: 21547467 123 | #################################################> 100% [eta: 0s] fffffffff15abf397da76f1dcc1a1604f45126db 124 | Top 25 storage slot 29-byte prefix repetitions: 125 | 0000000000000000000000000000000000000000000000000000000000: 56944638 (4.65%) ~1574MiB (cumm 1574MiB) 126 | f3f7a9fe364faab93b216da50a3214154f22a0a2b415b23a84c8169e8b: 13665589 (1.12%) ~377MiB (cumm 1952MiB) 127 | 8a35acfbc15ff81a39ae7d344fd709f28e8600b4aa8c65c6b64bfe7fe3: 9425916 (0.77%) ~260MiB (cumm 2213MiB) 128 | f652222313e28459528d920b65115c16c04f3efc82aaedc97be59f3f37: 8546483 (0.70%) ~236MiB (cumm 2449MiB) 129 | 405787fa12a823e0f2b7631cc41b3ba8828b3321ca811111fa75cd3aa3: 7701011 (0.63%) ~212MiB (cumm 2662MiB) 130 | a66cc928b5edb82af9bd49922954155ab7b0942694bea4ce44661d9a87: 3509056 (0.29%) ~97MiB (cumm 2759MiB) 131 | ... 132 | ``` 133 | 134 | ## LICENSE 135 | 136 | MIT. 137 | -------------------------------------------------------------------------------- /analysis/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "analysis" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | [dependencies] 7 | reth-db.workspace = true 8 | reth-chainspec.workspace = true 9 | reth-provider.workspace = true 10 | reth-node-types.workspace = true 11 | reth-stages.workspace = true 12 | reth-node-ethereum.workspace = true 13 | reth-db-api.workspace = true 14 | anyhow.workspace = true 15 | alloy-primitives.workspace = true 16 | clap = { version = "4.5.30", features = ["derive"] } 17 | indicatif = "0.17.11" 18 | serde = "1.0.217" 19 | serde_json = "1.0.138" 20 | tabled = { version = "0.18.0" } 21 | -------------------------------------------------------------------------------- /analysis/src/accounts.rs: -------------------------------------------------------------------------------- 1 | use std::sync::LazyLock; 2 | 3 | use alloy_primitives::{Address, B256, U256}; 4 | use anyhow::Result; 5 | use indicatif::{ProgressBar, ProgressStyle}; 6 | use reth_db::mdbx::tx::Tx; 7 | use reth_db::mdbx::RO; 8 | use reth_db::{Bytecodes, PlainAccountState, PlainStorageState}; 9 | use reth_db_api::cursor::DbCursorRO; 10 | use reth_db_api::transaction::DbTx; 11 | use serde::{Deserialize, Serialize}; 12 | use std::cmp::min; 13 | 14 | static PROGRESS_STYLE: LazyLock = LazyLock::new(|| { 15 | ProgressStyle::with_template("{bar:50.cyan/blue} {percent}% [eta: {eta}] {msg}") 16 | .expect("Failed to set progress bar style template") 17 | .progress_chars("#>-") 18 | }); 19 | 20 | #[derive(Debug, Serialize, Deserialize)] 21 | pub struct AccountStemStats { 22 | pub address: Address, 23 | pub bytecode_len: usize, 24 | pub account_stem: u16, 25 | pub ss_stems: Vec, 26 | pub code_stems: u16, 27 | pub num_storage_slots: usize, 28 | } 29 | 30 | pub fn account_stats(tx: &Tx, group_size: u16) -> Result> { 31 | let bar = ProgressBar::new(tx.entries::()? as u64) 32 | .with_style(PROGRESS_STYLE.clone()) 33 | .with_message("Analyzing..."); 34 | 35 | let header_storage_offset = 64; 36 | let code_offset = group_size / 2; 37 | let ss_header_count = to_b256(code_offset - header_storage_offset); 38 | let group_size_bits = group_size.trailing_zeros(); 39 | 40 | let mut accounts = Vec::::new(); 41 | let mut cur = tx.cursor_read::()?; 42 | loop { 43 | match cur.next() { 44 | Ok(Some((address, _))) => { 45 | bar.set_message(address.to_string().to_lowercase()); 46 | let account = tx.get::(address)?.unwrap(); 47 | let bytecode = tx 48 | .get::(account.get_bytecode_hash())? 49 | .unwrap_or_default(); 50 | let code_chunks_count = ((bytecode.len() + 30) / 31) as u16; 51 | let code_chunks_in_header = min(group_size - code_offset, code_chunks_count); 52 | 53 | let mut stats = AccountStemStats { 54 | address, 55 | bytecode_len: bytecode.len(), 56 | account_stem: 1 + 1 + code_chunks_in_header, // BASIC_DATA + CODE_HASH + header_code_chunks 57 | ss_stems: vec![], 58 | code_stems: (code_chunks_count - code_chunks_in_header).div_ceil(group_size), 59 | num_storage_slots: 0, 60 | }; 61 | 62 | let mut cur = tx.cursor_read::()?; 63 | let mut entry = cur.seek_exact(address)?; 64 | let mut curr_ss_group = U256::default(); 65 | while let Some((slot_address, slot)) = entry { 66 | if slot_address != address { 67 | break; 68 | } 69 | stats.num_storage_slots += 1; 70 | if slot.key < ss_header_count { 71 | stats.account_stem += 1; 72 | } else { 73 | let (mut ss_group, _) = U256::from_be_slice(slot.key.as_slice()) 74 | .overflowing_shr(group_size_bits as usize); 75 | ss_group = ss_group.checked_add(U256::from(1)).unwrap(); 76 | 77 | if ss_group != curr_ss_group { 78 | curr_ss_group = ss_group; 79 | stats.ss_stems.push(1); 80 | } else { 81 | *stats.ss_stems.last_mut().unwrap() += 1; 82 | } 83 | } 84 | entry = cur.next()?; 85 | } 86 | accounts.push(stats); 87 | } 88 | Ok(None) => { 89 | break; 90 | } 91 | Err(e) => { 92 | return Err(e.into()); 93 | } 94 | } 95 | bar.inc(1); 96 | } 97 | bar.finish_and_clear(); 98 | 99 | Ok(accounts) 100 | } 101 | 102 | fn to_b256(value: u16) -> B256 { 103 | let mut buf = [0u8; 32]; 104 | buf[30..].copy_from_slice(&value.to_be_bytes()); 105 | B256::new(buf) 106 | } 107 | -------------------------------------------------------------------------------- /analysis/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Result}; 2 | use clap::Parser; 3 | use reth_chainspec::ChainSpecBuilder; 4 | use reth_db::{ 5 | mdbx::{tx::Tx, DatabaseArguments, MaxReadTransactionDuration, RO}, 6 | DatabaseEnv, 7 | }; 8 | use reth_node_ethereum::EthereumNode; 9 | use reth_node_types::NodeTypesWithDBAdapter; 10 | use reth_provider::{providers::StaticFileProvider, ProviderFactory, StageCheckpointReader}; 11 | use reth_stages::StageId; 12 | use std::{path::Path, sync::Arc}; 13 | use tabled::{settings::Panel, Table, Tabled}; 14 | 15 | mod accounts; 16 | 17 | #[derive(Parser)] 18 | #[command(name = "report")] 19 | struct Cli { 20 | #[arg(short = 'd', long = "datadir", help = "Reth datadir path")] 21 | datadir: String, 22 | 23 | #[command(subcommand)] 24 | subcmd: SubCommand, 25 | } 26 | 27 | #[derive(Parser)] 28 | enum SubCommand { 29 | #[command(name = "accounts-stats", about = "Generate account stats report")] 30 | AccountsStats, 31 | } 32 | 33 | fn main() -> Result<()> { 34 | let cli = Cli::parse(); 35 | 36 | let db_path = Path::new(&cli.datadir).join("db"); 37 | let db = reth_db::open_db_read_only( 38 | db_path.as_ref(), 39 | DatabaseArguments::default() 40 | .with_max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), 41 | ) 42 | .map_err(|err| anyhow!(err))?; 43 | 44 | let spec = ChainSpecBuilder::mainnet().build(); 45 | let factory = ProviderFactory::>>::new( 46 | db.into(), 47 | spec.into(), 48 | StaticFileProvider::read_only(db_path.join("static_files"), true)?, 49 | ); 50 | let provider = factory.provider()?; 51 | 52 | let latest_block_number = provider 53 | .get_stage_checkpoint(StageId::Finish)? 54 | .map(|ch| ch.block_number) 55 | .ok_or(anyhow!("No finish checkpoint"))?; 56 | println!("Database block number: {:?}", latest_block_number); 57 | 58 | let tx = provider.into_tx(); 59 | 60 | match cli.subcmd { 61 | SubCommand::AccountsStats => account_stats(tx)?, 62 | } 63 | 64 | Ok(()) 65 | } 66 | 67 | fn account_stats(tx: Tx) -> Result<()> { 68 | let stats = accounts::account_stats(&tx, 256)?; 69 | { 70 | #[derive(Tabled)] 71 | struct AccountCounts { 72 | eoas: usize, 73 | contracts: usize, 74 | total: usize, 75 | } 76 | let eoa_count = stats.iter().filter(|a| a.bytecode_len == 0).count(); 77 | let table = Table::new(vec![AccountCounts { 78 | eoas: eoa_count, 79 | contracts: stats.len() - eoa_count, 80 | total: stats.len(), 81 | }]) 82 | .with(Panel::header("Accounts")) 83 | .to_string(); 84 | 85 | println!("{}\n", table); 86 | } 87 | 88 | { 89 | let mut code_lens: Vec = stats 90 | .iter() 91 | .filter(|a| a.bytecode_len > 0) 92 | .map(|a| a.bytecode_len as u64) 93 | .collect(); 94 | let table = Table::new(vec![calculate_stats(&mut code_lens)]) 95 | .with(Panel::header("Code length")) 96 | .to_string(); 97 | 98 | println!("{}\n", table); 99 | } 100 | 101 | { 102 | let mut num_storage_slots: Vec = stats 103 | .iter() 104 | .filter(|a| a.bytecode_len > 0) 105 | .map(|a| a.num_storage_slots as u64) 106 | .collect(); 107 | let table = Table::new(vec![calculate_stats(&mut num_storage_slots)]) 108 | .with(Panel::header("Contract storage slots count")) 109 | .to_string(); 110 | 111 | println!("{}\n", table); 112 | } 113 | 114 | { 115 | let total_stems = stats 116 | .iter() 117 | .map(|a| 1 + a.ss_stems.len() + a.code_stems as usize) 118 | .sum::(); 119 | 120 | #[derive(Tabled)] 121 | struct StemCountRow { 122 | name: &'static str, 123 | total: u64, 124 | #[tabled(rename = "%", format = "{:.2}%")] 125 | percentage: f64, 126 | } 127 | let contract_header_stems = stats.len() as u64; 128 | let storage_slots_stems = stats.iter().map(|a| a.ss_stems.len() as u64).sum(); 129 | let code_chunks_stems = stats.iter().map(|a| a.code_stems as u64).sum(); 130 | let table = Table::new([ 131 | StemCountRow { 132 | name: "Accounts header stems", 133 | total: contract_header_stems, 134 | percentage: contract_header_stems as f64 / total_stems as f64 * 100.0, 135 | }, 136 | StemCountRow { 137 | name: "Storage-slots stems", 138 | total: storage_slots_stems, 139 | percentage: storage_slots_stems as f64 / total_stems as f64 * 100.0, 140 | }, 141 | StemCountRow { 142 | name: "Code-chunks stems", 143 | total: code_chunks_stems, 144 | percentage: code_chunks_stems as f64 / total_stems as f64 * 100.0, 145 | }, 146 | ]) 147 | .with(Panel::header("Stems type counts")) 148 | .with(Panel::footer(format!("Total = {}", total_stems))) 149 | .to_string(); 150 | 151 | println!("{}\n", table); 152 | } 153 | 154 | { 155 | #[derive(Tabled)] 156 | struct ContractStemRow { 157 | name: &'static str, 158 | average: u64, 159 | median: u64, 160 | p99: u64, 161 | max: u64, 162 | } 163 | let account_stats = 164 | calculate_stats(&mut stats.iter().map(|a| a.account_stem).collect::>()); 165 | let ss_stats = calculate_stats( 166 | &mut stats 167 | .iter() 168 | .flat_map(|a| a.ss_stems.clone()) 169 | .collect::>(), 170 | ); 171 | 172 | let table = Table::new([ 173 | ContractStemRow { 174 | name: "Accounts header stems", 175 | average: account_stats.average, 176 | median: account_stats.median, 177 | p99: account_stats.p99, 178 | max: account_stats.max, 179 | }, 180 | ContractStemRow { 181 | name: "Storage slots stems", 182 | average: ss_stats.average, 183 | median: ss_stats.median, 184 | p99: ss_stats.p99, 185 | max: ss_stats.max, 186 | }, 187 | ]) 188 | .with(Panel::header("Stems non-zero values count distribution")) 189 | .to_string(); 190 | 191 | println!("{}\n", table); 192 | } 193 | 194 | { 195 | #[derive(Tabled)] 196 | struct SingleSlotStem { 197 | #[tabled(rename = "Storage-slot stems with single non-zero values")] 198 | single_slot_stems: usize, 199 | } 200 | let table = Table::new([SingleSlotStem { 201 | single_slot_stems: stats 202 | .iter() 203 | .map(|a| a.ss_stems.iter().filter(|s| **s == 1).count()) 204 | .sum(), 205 | }]) 206 | // .with(Panel::header("Single-slot stems")) 207 | .to_string(); 208 | 209 | println!("{}\n", table); 210 | } 211 | 212 | Ok(()) 213 | } 214 | 215 | #[derive(Debug, Tabled)] 216 | pub struct Stats { 217 | sum: u64, 218 | average: u64, 219 | median: u64, 220 | p99: u64, 221 | max: u64, 222 | } 223 | 224 | fn calculate_stats(data: &mut [T]) -> Stats 225 | where 226 | T: Copy + Into + Ord, 227 | { 228 | data.sort(); 229 | let count = data.len() as u64; 230 | let sum: u64 = data.iter().map(|&x| x.into()).sum(); 231 | let average = sum / count; 232 | let median = data[count as usize / 2].into(); 233 | let p99 = data[(count as f64 * 0.99) as usize].into(); 234 | let max = data.last().map_or(0, |&x| x.into()); 235 | 236 | Stats { 237 | sum, 238 | average, 239 | median, 240 | p99, 241 | max, 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /preimages/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "preimages" 3 | version = "0.1.0" 4 | edition = "2021" 5 | keywords = ["ethereum", "stateless", "preimages"] 6 | readme = "README.md" 7 | 8 | [dependencies] 9 | reth-db.workspace = true 10 | reth-db-api.workspace = true 11 | reth-chainspec.workspace = true 12 | reth-provider.workspace = true 13 | reth-node-types.workspace = true 14 | reth-node-ethereum.workspace = true 15 | reth-stages.workspace = true 16 | anyhow.workspace = true 17 | alloy-primitives.workspace = true 18 | alloy-chains.workspace = true 19 | clap = "4.5.23" 20 | indicatif = "0.17.9" 21 | rayon = "1.10.0" 22 | hex = "0.4.3" 23 | -------------------------------------------------------------------------------- /preimages/src/cmds.rs: -------------------------------------------------------------------------------- 1 | use crate::iterators::plain::PlainIterator; 2 | use crate::iterators::{AccountStorageItem, PreimageIterator}; 3 | use crate::progress::AddressProgressBar; 4 | use alloy_primitives::{Address, FixedBytes}; 5 | use anyhow::{anyhow, Context, Result}; 6 | use reth_db::mdbx::tx::Tx; 7 | use reth_db::mdbx::RO; 8 | use std::collections::HashMap; 9 | use std::{ 10 | fs::File, 11 | io::{BufReader, BufWriter, Read, Write}, 12 | }; 13 | 14 | pub fn generate(path: &str, it: impl PreimageIterator, mut pb: AddressProgressBar) -> Result<()> { 15 | let mut f = BufWriter::new(File::create(path)?); 16 | for entry in it { 17 | match entry { 18 | Ok(AccountStorageItem::Account(address)) => { 19 | pb.progress(address); 20 | f.write_all(address.as_slice()) 21 | .context("writing address preimage")?; 22 | } 23 | Ok(AccountStorageItem::StorageSlot(_, ss)) => { 24 | f.write_all(ss.as_slice()) 25 | .context("writing storage slot preimage")?; 26 | } 27 | Err(e) => return Err(e), 28 | } 29 | } 30 | Ok(()) 31 | } 32 | 33 | pub fn verify(path: &str, it: impl PreimageIterator, mut pb: AddressProgressBar) -> Result<()> { 34 | let mut reader = BufReader::new(File::open(path)?); 35 | 36 | for entry in it { 37 | match entry { 38 | Ok(AccountStorageItem::Account(addr)) => { 39 | pb.progress(addr); 40 | let mut file_addr: Address = Default::default(); 41 | reader 42 | .read_exact(file_addr.as_mut_slice()) 43 | .context("reading address preimage")?; 44 | 45 | if addr != file_addr.as_slice() { 46 | return Err(anyhow!("Address {} preimage mismatch", file_addr)); 47 | } 48 | } 49 | Ok(AccountStorageItem::StorageSlot(address, ss)) => { 50 | let mut file_ss: FixedBytes<32> = Default::default(); 51 | reader 52 | .read_exact(file_ss.as_mut_slice()) 53 | .context("reading storage slot preimage")?; 54 | if ss != file_ss.as_slice() { 55 | return Err(anyhow!( 56 | "Storage slot {} preimage (address: {}) mistmatch", 57 | ss, 58 | address 59 | )); 60 | } 61 | } 62 | Err(e) => return Err(e), 63 | } 64 | } 65 | Ok(()) 66 | } 67 | 68 | pub fn storage_slot_freq(tx: &Tx, top_n_detail: usize) -> Result<()> { 69 | let mut counts: HashMap<[u8; N], u32> = HashMap::new(); 70 | let mut pb = AddressProgressBar::new(false); 71 | let it = PlainIterator::new(tx)?; 72 | let mut total_storage_slots = 0; 73 | for entry in it { 74 | match entry { 75 | Ok(AccountStorageItem::Account(address)) => { 76 | pb.progress(address); 77 | } 78 | Ok(AccountStorageItem::StorageSlot(_, key)) => { 79 | total_storage_slots += 1; 80 | counts 81 | .entry(key.0[0..N].try_into()?) 82 | .and_modify(|e| *e += 1) 83 | .or_insert(1); 84 | if counts.len() > 200_000_000 { 85 | counts.retain(|_, count| *count > 1); 86 | } 87 | } 88 | Err(e) => return Err(e), 89 | } 90 | } 91 | // Only keep storage slots that are _potentially_ worth deduping. 92 | counts.retain(|_, count| *count > 1); 93 | 94 | let mut counts_vec = counts.iter().collect::>(); 95 | counts_vec.sort_unstable_by_key(|(_, v)| std::cmp::Reverse(*v)); 96 | let mut cummulative_count: u64 = 0; 97 | println!( 98 | "Top {} storage slot {}-byte prefix repetitions:", 99 | top_n_detail, N 100 | ); 101 | for e in counts_vec.iter().take(top_n_detail) { 102 | cummulative_count += *e.1 as u64; 103 | println!( 104 | "{}: {} ({:.2}%) ~{}MiB (cumm {:.2}MiB)", 105 | hex::encode(e.0), 106 | e.1, 107 | (*e.1 as f64) / (total_storage_slots as f64) * 100.0, 108 | e.1 * (N as u32) / 1024 / 1024, 109 | cummulative_count * (N as u64) / 1024 / 1024, 110 | ); 111 | } 112 | 113 | Ok(()) 114 | } 115 | -------------------------------------------------------------------------------- /preimages/src/iterators/eip7748.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the EIP-7748 preimage access sequence iterator. 2 | //! 3 | //! This module provides an account and storage slot iterator respecting the order defined in EIP-7748. 4 | //! The ordering can be summarized as: 5 | //! 1. DFS the state tree, until an account is reached. 6 | //! 2. For each account, iterate over its state trie also in DFS order. 7 | //! 8 | //! In summary, the ordering is based by account and storage slot _hash_ (i.e keccak256). 9 | //! 10 | //! Sample output: [hash(account1), hash(account1_ss0), hash(account1_ss1), hash(account2), hash(account3), hash(account3_ss0), ...] 11 | 12 | use alloy_primitives::{keccak256, Address, B256}; 13 | use anyhow::Result; 14 | use rayon::slice::ParallelSliceMut; 15 | use reth_db::mdbx::cursor::Cursor; 16 | use reth_db::mdbx::RO; 17 | use reth_db::{mdbx::tx::Tx, PlainAccountState, PlainStorageState}; 18 | use reth_db_api::cursor::DbCursorRO; 19 | use reth_db_api::transaction::DbTx; 20 | 21 | use super::{AccountStorageItem, PreimageIterator}; 22 | 23 | pub struct Eip7748Iterator { 24 | state: State, 25 | 26 | ordered_addresses: Vec
, 27 | ordered_addresses_idx: usize, 28 | 29 | cursor_storage_slots: Cursor, 30 | buf_storage_slot: Option>, 31 | buf_storage_slot_idx: usize, 32 | } 33 | 34 | enum State { 35 | Account, 36 | StorageSlot(Address), 37 | End, 38 | } 39 | 40 | impl PreimageIterator for Eip7748Iterator {} 41 | 42 | impl Eip7748Iterator { 43 | pub fn new

(tx: &Tx, mut progress: Option

) -> Result 44 | where 45 | P: FnMut(Address), 46 | { 47 | let mut addresses = Vec::with_capacity(300_000_000); 48 | let mut cursor_accounts = tx.cursor_read::()?; 49 | while let Some((address, _)) = cursor_accounts.next()? { 50 | addresses.push((address, keccak256(address))); 51 | if let Some(ref mut progress) = progress { 52 | progress(address); 53 | } 54 | } 55 | addresses.par_sort_by_key(|addr| addr.1); 56 | 57 | Ok(Eip7748Iterator { 58 | state: State::Account, 59 | ordered_addresses: addresses.into_iter().map(|(addr, _)| addr).collect(), 60 | ordered_addresses_idx: 0, 61 | cursor_storage_slots: tx.cursor_read::()?, 62 | buf_storage_slot: None, 63 | buf_storage_slot_idx: 0, 64 | }) 65 | } 66 | } 67 | 68 | impl Iterator for Eip7748Iterator { 69 | type Item = Result; 70 | 71 | fn next(&mut self) -> Option { 72 | match &mut self.state { 73 | State::Account => match self.ordered_addresses.get(self.ordered_addresses_idx) { 74 | Some(address) => { 75 | self.ordered_addresses_idx += 1; 76 | self.state = State::StorageSlot(*address); 77 | Some(Ok(AccountStorageItem::Account(*address))) 78 | } 79 | None => { 80 | self.state = State::End; 81 | None 82 | } 83 | }, 84 | State::StorageSlot(address) => { 85 | let sorted_storage_slots = self.buf_storage_slot.get_or_insert_with(|| { 86 | let mut storage_slots = Vec::with_capacity(1 << 15); 87 | let mut curr = self.cursor_storage_slots.seek(*address).unwrap(); 88 | while let Some((addr, ss)) = curr { 89 | if addr != *address { 90 | break; 91 | } 92 | storage_slots.push((ss.key, keccak256(ss.key))); 93 | curr = self.cursor_storage_slots.next().unwrap(); 94 | } 95 | storage_slots.par_sort_by_key(|(_, hashed_ss)| *hashed_ss); 96 | storage_slots.into_iter().map(|(ss, _)| ss).collect() 97 | }); 98 | 99 | match sorted_storage_slots.get(self.buf_storage_slot_idx) { 100 | Some(key) => { 101 | self.buf_storage_slot_idx += 1; 102 | Some(Ok(AccountStorageItem::StorageSlot(*address, *key))) 103 | } 104 | None => { 105 | self.buf_storage_slot = None; 106 | self.buf_storage_slot_idx = 0; 107 | self.state = State::Account; 108 | self.next() 109 | } 110 | } 111 | } 112 | State::End => None, 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /preimages/src/iterators/mod.rs: -------------------------------------------------------------------------------- 1 | //! Multiple iterator implementators to dump the preimages in different orders 2 | //! 3 | //! This crate provides two different implementations of the preimage iterator: 4 | //! - EIP-7748: The iterator respects the order defined in EIP-7748. 5 | //! - Plain: The iterator respects the plain ordering in the database. 6 | //! 7 | //! See each module docs for more information. 8 | 9 | use alloy_primitives::{Address, B256}; 10 | use anyhow::Result; 11 | 12 | pub mod eip7748; 13 | pub mod plain; 14 | 15 | pub enum AccountStorageItem { 16 | Account(Address), 17 | StorageSlot(Address, B256), 18 | } 19 | pub trait PreimageIterator: Iterator> {} 20 | -------------------------------------------------------------------------------- /preimages/src/iterators/plain.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the plain preimage access sequence iterator. 2 | //! 3 | //! This module provides an account and storage slot iterator respecting the plain ordering in the database. 4 | //! The ordering can be summarized as: 5 | //! 1. Iterate the account sorted by address. 6 | //! 2. For each account, iterate over the sorted storage slots. 7 | //! 8 | //! No actual sorting is required since both addresses and storage slots are naturally sorted in the db. 9 | //! 10 | //! Sample output: [account1, account1_ss0, account1_ss1, account2, account3, account3_ss0, ...] 11 | 12 | use alloy_primitives::{Address, B256}; 13 | use anyhow::Result; 14 | use reth_db::mdbx::cursor::Cursor; 15 | use reth_db::mdbx::RO; 16 | use reth_db::{mdbx::tx::Tx, PlainAccountState, PlainStorageState}; 17 | use reth_db_api::cursor::DbCursorRO; 18 | use reth_db_api::transaction::DbTx; 19 | 20 | use super::{AccountStorageItem, PreimageIterator}; 21 | 22 | pub struct PlainIterator { 23 | cursor_accounts: Cursor, 24 | cursor_storage_slots: Cursor, 25 | 26 | state: State, 27 | buf_storage_slot: Option<(Address, B256)>, 28 | } 29 | 30 | enum State { 31 | Account, 32 | StorageSlot(Address), 33 | End, 34 | } 35 | 36 | impl PlainIterator { 37 | pub fn new(tx: &Tx) -> Result { 38 | let cursor_accounts = tx.cursor_read::()?; 39 | let cursor_storage_slots = tx.cursor_read::()?; 40 | 41 | Ok(PlainIterator { 42 | cursor_accounts, 43 | cursor_storage_slots, 44 | state: State::Account, 45 | buf_storage_slot: None, 46 | }) 47 | } 48 | } 49 | 50 | impl PreimageIterator for PlainIterator {} 51 | 52 | impl Iterator for PlainIterator { 53 | type Item = Result; 54 | 55 | fn next(&mut self) -> Option { 56 | match &mut self.state { 57 | State::Account => { 58 | let next_account = match self.cursor_accounts.next() { 59 | Ok(account) => account, 60 | Err(e) => return Some(Err(e.into())), 61 | }; 62 | match next_account { 63 | Some((address, _)) => { 64 | self.state = State::StorageSlot(address); 65 | Some(Ok(AccountStorageItem::Account(address))) 66 | } 67 | None => { 68 | self.state = State::End; 69 | None 70 | } 71 | } 72 | } 73 | State::StorageSlot(address) => { 74 | if let Some((addr, key)) = self.buf_storage_slot { 75 | if addr == *address { 76 | self.buf_storage_slot = None; 77 | return Some(Ok(AccountStorageItem::StorageSlot(addr, key))); 78 | } else { 79 | self.state = State::Account; 80 | return self.next(); 81 | } 82 | } 83 | let next_storage_slot = match self.cursor_storage_slots.next() { 84 | Ok(storage_entry) => storage_entry, 85 | Err(e) => return Some(Err(e.into())), 86 | }; 87 | match next_storage_slot { 88 | Some((addr, storage_entry)) => { 89 | self.buf_storage_slot = Some((addr, storage_entry.key)); 90 | self.next() 91 | } 92 | None => { 93 | self.state = State::Account; 94 | self.next() 95 | } 96 | } 97 | } 98 | State::End => None, 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /preimages/src/main.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Result}; 2 | use clap::{command, Args, Parser}; 3 | use iterators::{eip7748::Eip7748Iterator, plain::PlainIterator}; 4 | use progress::AddressProgressBar; 5 | use reth_chainspec::ChainSpecBuilder; 6 | use reth_db::{ 7 | mdbx::{tx::Tx, DatabaseArguments, MaxReadTransactionDuration, RO}, 8 | DatabaseEnv, 9 | }; 10 | use reth_node_ethereum::EthereumNode; 11 | use reth_node_types::NodeTypesWithDBAdapter; 12 | use reth_provider::{providers::StaticFileProvider, ProviderFactory, StageCheckpointReader}; 13 | use reth_stages::StageId; 14 | use std::{path::Path, sync::Arc}; 15 | 16 | mod cmds; 17 | mod iterators; 18 | mod progress; 19 | 20 | #[derive(Parser)] 21 | #[command(name = "report")] 22 | struct Cli { 23 | #[arg(short = 'd', long = "datadir", help = "Reth datadir path")] 24 | datadir: String, 25 | 26 | #[command(subcommand)] 27 | subcmd: SubCommand, 28 | } 29 | 30 | #[derive(Parser)] 31 | enum SubCommand { 32 | #[command(name = "generate", about = "Generate preimage file")] 33 | Generate { 34 | #[arg( 35 | long = "output-path", 36 | help = "Preimages file output path", 37 | default_value = "preimages.bin" 38 | )] 39 | path: String, 40 | 41 | #[command(flatten)] 42 | order: OrderArgs, 43 | }, 44 | 45 | #[command(name = "verify", about = "Verify preimage file")] 46 | Verify { 47 | #[arg(long = "path", help = "Preimages file path to verify")] 48 | path: String, 49 | 50 | #[command(flatten)] 51 | order: OrderArgs, 52 | }, 53 | 54 | #[command( 55 | name = "storage-slot-freq", 56 | about = "Analyze storage-slot 29-byte prefix frequency and size impact" 57 | )] 58 | StorageSlotsFrequency, 59 | } 60 | 61 | #[derive(Args)] 62 | #[group(required = true, multiple = false)] 63 | struct OrderArgs { 64 | #[arg(long, help = "Use plain ordering")] 65 | plain: bool, 66 | #[arg(long, help = "Use EIP-7748 ordering (i.e: hashed)")] 67 | eip7748: bool, 68 | } 69 | 70 | fn main() -> Result<()> { 71 | let cli = Cli::parse(); 72 | 73 | let db_path = Path::new(&cli.datadir).join("db"); 74 | let db = reth_db::open_db_read_only( 75 | db_path.as_ref(), 76 | DatabaseArguments::default() 77 | .with_max_read_transaction_duration(Some(MaxReadTransactionDuration::Unbounded)), 78 | ) 79 | .map_err(|err| anyhow!(err))?; 80 | let spec = ChainSpecBuilder::mainnet().build(); 81 | let factory = ProviderFactory::>>::new( 82 | db.into(), 83 | spec.into(), 84 | StaticFileProvider::read_only(db_path.join("static_files"), true)?, 85 | ); 86 | let provider = factory.provider()?; 87 | 88 | let latest_block_number = provider 89 | .get_stage_checkpoint(StageId::Finish)? 90 | .map(|ch| ch.block_number) 91 | .ok_or(anyhow!("No finish checkpoint"))?; 92 | println!("Database block number: {:?}", latest_block_number); 93 | 94 | let tx = provider.tx_ref(); 95 | match cli.subcmd { 96 | SubCommand::Generate { path, order } => generate_cmd(tx, &path, order)?, 97 | SubCommand::Verify { path, order } => { 98 | verify_cmd(tx, &path, order)?; 99 | } 100 | SubCommand::StorageSlotsFrequency => cmds::storage_slot_freq::<29>(tx, 1_000)?, 101 | } 102 | 103 | Ok(()) 104 | } 105 | 106 | fn generate_cmd(tx: &Tx, path: &str, order: OrderArgs) -> Result<()> { 107 | if order.plain { 108 | println!("[1/1] Generating preimage file..."); 109 | cmds::generate( 110 | path, 111 | PlainIterator::new(tx)?, 112 | AddressProgressBar::new(false), 113 | )?; 114 | } else if order.eip7748 { 115 | println!("[1/2] Ordering account addresses by hash..."); 116 | let mut pb = AddressProgressBar::new(false); 117 | let it = Eip7748Iterator::new(tx, Some(|addr| pb.progress(addr)))?; 118 | println!("[2/2] Generating preimage file..."); 119 | cmds::generate(path, it, AddressProgressBar::new(true))?; 120 | } else { 121 | return Err(anyhow!("No ordering specified")); 122 | } 123 | Ok(()) 124 | } 125 | 126 | fn verify_cmd(tx: &Tx, path: &str, order: OrderArgs) -> Result<()> { 127 | if order.plain { 128 | println!("[1/2] Verifying provided preimage file..."); 129 | cmds::verify( 130 | path, 131 | PlainIterator::new(tx)?, 132 | AddressProgressBar::new(false), 133 | )?; 134 | println!("[2/2] The preimage file is valid!"); 135 | } else if order.eip7748 { 136 | println!("[1/3] Ordering account addresses by hash..."); 137 | let mut pb = AddressProgressBar::new(false); 138 | let it = Eip7748Iterator::new(tx, Some(|addr| pb.progress(addr)))?; 139 | println!("[2/3] Verifying provided preimage file..."); 140 | cmds::verify(path, it, AddressProgressBar::new(true))?; 141 | println!("[3/3] The preimage file is valid!"); 142 | } else { 143 | return Err(anyhow!("No ordering specified")); 144 | } 145 | Ok(()) 146 | } 147 | -------------------------------------------------------------------------------- /preimages/src/progress.rs: -------------------------------------------------------------------------------- 1 | use alloy_primitives::{keccak256, Address}; 2 | use indicatif::{ProgressBar, ProgressStyle}; 3 | pub struct AddressProgressBar { 4 | inner: ProgressBar, 5 | hash_on_progress: bool, 6 | } 7 | 8 | impl AddressProgressBar { 9 | pub fn new(hash_on_progress: bool) -> Self { 10 | let inner = ProgressBar::new(0x10000); 11 | inner.set_style( 12 | ProgressStyle::with_template("{bar:50.cyan/blue} {percent}% [eta: {eta}] {msg}") 13 | .expect("Failed to set progress bar style template") 14 | .progress_chars("#>-"), 15 | ); 16 | Self { 17 | inner, 18 | hash_on_progress, 19 | } 20 | } 21 | 22 | pub fn progress(&mut self, addr: Address) { 23 | let hashed_addr = keccak256(addr); 24 | let progress_val = if self.hash_on_progress { 25 | hashed_addr.as_slice() 26 | } else { 27 | addr.as_slice() 28 | }; 29 | self.inner 30 | .set_position(u64::from(progress_val[0]) << 8 | u64::from(progress_val[1])); 31 | self.inner.set_message(hex::encode(progress_val)); 32 | } 33 | } 34 | --------------------------------------------------------------------------------