├── .github └── workflows │ └── rust.yml ├── .gitignore ├── .mise.toml ├── .rustfmt.toml ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches ├── datasets │ └── sepolia-usdc-holders.txt ├── ethui-indexer.toml ├── provider_concurrency.rs ├── usdc_holders_backfill.rs └── utils │ └── mod.rs ├── deny.toml ├── diesel.patch ├── diesel.toml ├── eip712.json ├── ethui-indexer.toml ├── migrations ├── 00000000000000_diesel_initial_setup │ ├── down.sql │ └── up.sql ├── 2023-11-27-093047_enable_extensions │ ├── down.sql │ └── up.sql ├── 2023-11-28-163820_create_accounts │ ├── down.sql │ └── up.sql ├── 2023-11-28-190011_create_txs │ ├── down.sql │ └── up.sql ├── 2023-11-30-220900_chains │ ├── down.sql │ └── up.sql └── 2023-12-01-114852_create_backfill_jobs │ ├── down.sql │ └── up.sql └── src ├── api ├── app.rs ├── app_state.rs ├── auth │ ├── middleware.rs │ ├── mod.rs │ └── signature.rs ├── error.rs ├── mod.rs ├── registration │ └── mod.rs └── test_utils.rs ├── config ├── mod.rs └── whitelist.rs ├── db ├── mod.rs ├── models.rs ├── schema.rs └── types.rs ├── lib.rs ├── main.rs ├── rearrange.rs ├── sync ├── backfill.rs ├── forward.rs ├── mod.rs ├── provider.rs └── utils.rs └── task.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: rust 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | tags: 8 | - "*" 9 | pull_request: 10 | types: [opened, synchronize] 11 | paths-ignore: 12 | - "*.json" 13 | - "*.md" 14 | - "LICENSE" 15 | merge_group: 16 | paths-ignore: 17 | - "*.json" 18 | - "*.md" 19 | - "LICENSE" 20 | 21 | concurrency: 22 | group: ${{ github.workflow }}-${{ github.ref }} 23 | cancel-in-progress: true 24 | 25 | jobs: 26 | cargo-test: 27 | runs-on: ubuntu-latest 28 | 29 | services: 30 | postgres: 31 | image: postgres 32 | env: 33 | POSTGRES_DB: test-db 34 | POSTGRES_PASSWORD: postgres 35 | ports: 36 | - "5432:5432" 37 | 38 | steps: 39 | - uses: actions/checkout@v3 40 | 41 | - name: install dependencies (ubuntu only) 42 | run: | 43 | sudo apt-get update 44 | sudo apt-get install -y libgtk-3-dev libwebkit2gtk-4.0-dev libappindicator3-dev librsvg2-dev patchelf libsodium-dev 45 | 46 | - uses: dtolnay/rust-toolchain@stable 47 | - uses: Swatinem/rust-cache@v2 48 | with: 49 | shared-key: "debug" 50 | save-if: ${{ github.ref == 'refs/heads/main' }} 51 | 52 | - run: cargo check 53 | - run: cargo clippy 54 | 55 | - run: cargo test 56 | env: 57 | TEST_DATABASE_URL: "postgres://postgres:postgres@localhost/test-db" 58 | 59 | cargo-deny: 60 | runs-on: ubuntu-latest 61 | strategy: 62 | matrix: 63 | checks: 64 | - advisories 65 | - bans licenses sources 66 | 67 | name: cargo-deny (${{ matrix.checks }}) 68 | 69 | # Prevent sudden announcement of a new advisory from failing ci: 70 | continue-on-error: ${{ matrix.checks == 'advisories' }} 71 | 72 | steps: 73 | - uses: actions/checkout@v3 74 | 75 | # check if cargo.lock changed 76 | # only needed in advisories 77 | - uses: dorny/paths-filter@v2 78 | id: lock 79 | with: 80 | filters: | 81 | src: 82 | - '**/Cargo.lock' 83 | if: ${{ matrix.checks == 'advisories' }} 84 | 85 | - uses: EmbarkStudios/cargo-deny-action@v1 86 | with: 87 | command: check ${{ matrix.checks }} 88 | # conditionally run advisories only if lock changed 89 | if: ${{ matrix.checks != 'advisories' || steps.lock.outputs.src == 'true' }} 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | !migrations/**/*.sql 3 | -------------------------------------------------------------------------------- /.mise.toml: -------------------------------------------------------------------------------- 1 | [tasks.run] 2 | alias = "r" 3 | run = "cargo run -- --config ethui-indexer.toml" 4 | 5 | [tasks.sign] 6 | run = "cast wallet sign --private-key $PRIVATE_KEY --data --from-file eip712.json" 7 | 8 | [tasks.test] 9 | alias = "t" 10 | run = "cargo test" 11 | 12 | [env] 13 | _.file = ".env" 14 | -------------------------------------------------------------------------------- /.rustfmt.toml: -------------------------------------------------------------------------------- 1 | imports_granularity = "crate" 2 | group_imports = "StdExternalCrate" 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "ethui-indexer" 3 | edition = "2021" 4 | version = "0.0.1" 5 | license-file = "./LICENSE" 6 | homepage = "https://github.com/ethui" 7 | repository = "https://github.com/ethui/indexer" 8 | exclude = [".github/"] 9 | authors = ["Miguel Palhas "] 10 | 11 | [dependencies] 12 | tokio = { version = "1.33", features = ["full", "sync"] } 13 | tokio-util = { version = "0.7.10", features = ["rt"] } 14 | clap = { version = "4.4.8", features = ["derive", "env"] } 15 | serde = { version = "1.0", features = ["derive", "std"] } 16 | futures = "0.3.28" 17 | toml = "0.8.8" 18 | async-trait = "0.1.74" 19 | 20 | # tracing 21 | color-eyre = "0.6.2" 22 | tracing = "0.1" 23 | tracing-subscriber = { version = "0.3", features = ["env-filter"] } 24 | 25 | # web 26 | axum = { version = "0.7.2", features = ["tracing"] } 27 | tower = "0.4.13" 28 | tower-http = { version = "0.5.0", features = ["cors", "trace"] } 29 | jsonwebtoken = "9.2.0" 30 | serde_json = "1.0.108" 31 | axum-extra = { version = "0.9.0", features = ["typed-header"] } 32 | 33 | # db 34 | diesel = { version = "2.2.3", features = ["postgres", "chrono", "numeric"] } 35 | diesel-async = { version = "0.5", features = [ 36 | "postgres", 37 | "deadpool", 38 | "async-connection-wrapper", 39 | ] } 40 | diesel_migrations = { version = "2.1.0", features = ["postgres"] } 41 | chrono = { version = "0.4.31", features = ["serde"], default-features = false } 42 | bigdecimal = { version = "0.4.2", default-features = false } 43 | 44 | # reth 45 | reth-db = { git = "https://github.com/paradigmxyz/reth", package = "reth-db", tag = "v1.0.5" } 46 | reth-primitives = { git = "https://github.com/paradigmxyz/reth", package = "reth-primitives", tag = "v1.0.5" } 47 | reth_provider = { git = "https://github.com/paradigmxyz/reth", package = "reth-provider", tag = "v1.0.5" } 48 | reth-rpc-types = { git = "https://github.com/paradigmxyz/reth", tag = "v1.0.5" } 49 | reth-chainspec = { git = "https://github.com/paradigmxyz/reth", tag = "v1.0.5" } 50 | 51 | # ethers 52 | ethers-core = { version = "2.0", default-features = false } 53 | ethers-signers = { version = "2.0", default-features = false } 54 | ethers-contract-derive = { version = "2.0", default-features = false } 55 | 56 | # alloy 57 | alloy-primitives = { version = "0.7.2", features = ["serde"] } 58 | 59 | # cuckoo 60 | scalable_cuckoo_filter = "0.2.3" 61 | rand = { version = "0.8.5", default-features = false, features = ["std_rng"] } 62 | thiserror = "1.0.61" 63 | serial_test = "3.1.1" 64 | url = "2.5.2" 65 | 66 | [dev-dependencies] 67 | criterion = { version = "0.5.1", features = ["async_tokio"] } 68 | lazy_static = "1.4.0" 69 | rstest = "0.18.2" 70 | tempfile = "3.10" 71 | 72 | [[bench]] 73 | name = "provider_concurrency" 74 | harness = false 75 | 76 | [[bench]] 77 | name = "usdc_holders_backfill" 78 | harness = false 79 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Miguel Palhas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ethui Indexer 2 | 3 | [reth]: https://paradigmxyz.github.io/reth/intro.html 4 | [reth-indexer]: https://github.com/joshstevens19/reth-indexer 5 | [ethui]: https://ethui.dev 6 | [miguel]: https://twitter.com/naps62 7 | [cuckoo]: https://en.wikipedia.org/wiki/Cuckoo_filter 8 | 9 | A parallel Reth indexer. 10 | 11 | Reads transaction history from [reth][reth]'s DB (direct from filesystem, skipping network & JSON-RPC overhead). It's able to index from a dynamic set of addresses, which can grow at runtime, by spawning parallel self-optimizing backfill jobs. 12 | 13 | **Note**: Kudos to [reth-indexer][reth-indexer], which was the original implementation that served as a basis for this. 14 | 15 | ## Disclaimer 16 | 17 | This is currently a prototype, and built to serve a yet-to-be-released feature of [ethui][ethui]. All development so far has been with that goal in mind. Don't expect a plug-and-play indexing solution for every use case (at least not right now) 18 | 19 | ## How to use 20 | 21 | 🚧 TODO 🚧 22 | 23 | For now, check `ethui-indexer.toml`, which should help you get started. Feel free to contact [me][miguel] or open issues for any questions. 24 | 25 | ## Why 26 | 27 | Fetching on-chain data can be a painful process. A simple query such as _"what is the transaction history for my wallet address?"_ translates into a time-consuming walk of the entire chain. 28 | It's also not enough to sync the `from` and `to` fields of every transaction (which would already be costly). Relevant transactions for a wallet are also based on the emitted topics, such as an ERC20 transfers. 29 | 30 | On top of this, most indexers require a predetermined set of topics to index, and any changes require a new full walk of the chain. 31 | 32 | Instead, `ethui-indexer` takes a different approach: new addresses can be added to the sync list at runtime, and self-optimizing backfill jobs are registered to backfill all data for each incoming address. 33 | 34 | ## How 35 | 36 | ### Forward & Backfill workers 37 | 38 | Let's illustrate this with an example: Say we're currently indexing only `alice`'s address. A regular syncing process is running, waiting for new blocks to process. 39 | 40 | After block 10, `bob`'s address is added to the set. From block 11 onwards, both `alice` and `bob` will be matched. But we missed blocks 1 through 10 for `bob`. At this point we register a new backill job for the missing data. 41 | 42 | We're now at this state: 43 | 44 | | job | account set | block range | 45 | | --------------- | -------------- | --------------- | 46 | | **Forward** | `[alice, bob]` | waiting for #11 | 47 | | **Backfill #1** | `[bob]` | `[1, 10]` | 48 | 49 | The new job starts immediately, in reverse order. 50 | 51 | A few moments later, `carol`'s address joins too. By now both existing jobs have advanced a bit: 52 | 53 | | job | account set | block range | notes | 54 | | --------------- | -------------- | --------------- | ----------------------------------------- | 55 | | **Forward** | `[alice, bob]` | waiting for #16 | | 56 | | **Backfill #1** | `[bob]` | `[1, 5]` | We've synced from 10 to 6 in the meantime | 57 | | **Backfill #2** | `[carol]` | `[1, 15]` | | 58 | 59 | The naive approach would be to the new job and run all 3 concurrently. 60 | This has one drawback thought: both backfill jobs will fetch redundant blocks (1 through 5). 61 | 62 | Instead of starting right away, we run a [reorganization step](https://github.com/ethui/indexer/blob/main/src/rearrange.rs): 63 | 64 | | job | account set | block range | notes | 65 | | --------------- | -------------- | --------------- | -------------------------------------- | 66 | | **Forward** | `[alice, bob]` | waiting for #16 | | 67 | | **Backfill #3** | `[bob,carol]` | `[1, 5]` | The overlapping range in one job... | 68 | | **Backfill #4** | `[carol]` | `[6, 15]` | ...And carol's unique range in another | 69 | 70 | This ensures we are never attempting to fetch the same block twice, therefore optimizing IO as much as possible. 71 | 72 | ### Cuckoo filters 73 | 74 | We make use of [Cuckoo filters][cuckoo] for efficiently filtering data inclusion. This is similar to how Bloom filters work, with additional benefits such as ability to remove items, and lower space overhead. The particular [implementation being used](https://docs.rs/scalable_cuckoo_filter/0.2.3/scalable_cuckoo_filter/index.html) also supports automatic scaling. 75 | 76 | ## Future Work 77 | 78 | ### To be done next 79 | 80 | - [ ] Finish the API 81 | - [ ] Add EIP-712 based authentication 82 | - [ ] Document this a bit better 83 | - [ ] Benchmark on a real mainnet node 84 | 85 | ### Future optimizations 86 | 87 | A few potential optimizations are still yet-to-be-done, but should help improve throughput even further: 88 | 89 | - [ ] Split workers into producer/consumers. Currently workers alternate between fetching a block and processing. Instead, which is not optimal for IO. (question: is this worth it? or can we just saturate read capacity by setting up more workers?); 90 | - [ ] Work-stealing. If we have a single backfill job walking N blocks, we can split it into Y jobs of N/Y blocks each. This can be done directly in the reorganization step. 91 | 92 | ## Benchmarks 93 | 94 | 🚧 TODO 🚧 95 | 96 | ## Requirements 97 | 98 | - A reth node running in the same node (requires access to the same filesystem) 99 | - PostgreSQL 100 | 101 | ## License 102 | 103 | [MIT](./LICENSE) License 104 | -------------------------------------------------------------------------------- /benches/ethui-indexer.toml: -------------------------------------------------------------------------------- 1 | [reth] 2 | db = "/mnt/data/eth/sepolia/reth/db" 3 | 4 | [chain] 5 | chain_id = 11155111 6 | start_block = 4800000 7 | 8 | [sync] 9 | buffer_size = 10000 10 | backfill_concurrency = 1000 11 | 12 | [db] 13 | url = "postgres://ethui_indexer:ethui-indexer&12345@localhost/ethui_indexer_test" 14 | -------------------------------------------------------------------------------- /benches/provider_concurrency.rs: -------------------------------------------------------------------------------- 1 | mod utils; 2 | 3 | use std::path::{Path, PathBuf}; 4 | 5 | use color_eyre::Result; 6 | use criterion::*; 7 | use reth_db::open_db_read_only; 8 | use reth_provider::{BlockReader, ProviderFactory, ReceiptProvider, TransactionsProvider}; 9 | use tokio::task; 10 | 11 | async fn run_multiple_providers(blocks: u64, concurrency: usize) -> Result<()> { 12 | let spec = (*reth_primitives::SEPOLIA).clone(); 13 | let path = Path::new("/mnt/data/eth/sepolia/reth/db"); 14 | let static_files = PathBuf::from("/mnt/data/eth/sepolia/reth/static_files"); 15 | let db = open_db_read_only(path, Default::default())?; 16 | 17 | let factory: ProviderFactory = 18 | ProviderFactory::new(db, spec.clone(), static_files).unwrap(); 19 | 20 | let mut handles = Vec::new(); 21 | let blocks_per_task = blocks as usize / concurrency; 22 | (0..concurrency).for_each(|i| { 23 | let provider = factory.provider().unwrap(); 24 | let from = 4700000 + i * blocks_per_task * 2; 25 | let to = from + blocks_per_task; 26 | let handle = task::spawn(async move { 27 | for block in from..to { 28 | let indices = provider.block_body_indices(block as u64).unwrap().unwrap(); 29 | for id in indices.first_tx_num..indices.first_tx_num + indices.tx_count { 30 | let _tx = provider.transaction_by_id_no_hash(id).unwrap(); 31 | let _receipt = provider.receipt(id).unwrap(); 32 | } 33 | //println!("finished {}", block); 34 | } 35 | }); 36 | handles.push(handle); 37 | }); 38 | 39 | for handle in handles { 40 | handle.await.unwrap(); 41 | } 42 | 43 | Ok(()) 44 | } 45 | 46 | /// Processes a total of 100k blocks in different configurations: 47 | /// - from 1 to 10000 concurrent jobs 48 | /// - job size varies from 1 block to 1000 blocks per job 49 | fn provider_concurrency(c: &mut Criterion) { 50 | println!("PID: {}", std::process::id()); 51 | let rt = tokio::runtime::Runtime::new().unwrap(); 52 | let mut group = c.benchmark_group("multiple_providers"); 53 | group.sample_size(10); 54 | let blocks = 1000; 55 | group.throughput(Throughput::Elements(blocks)); 56 | 57 | for concurrency in [1usize, 10, 100, 200, 400, 800].into_iter() { 58 | group.bench_with_input( 59 | BenchmarkId::from_parameter(concurrency), 60 | &concurrency, 61 | |b, concurrency| { 62 | b.to_async(&rt) 63 | .iter(|| async move { run_multiple_providers(blocks, *concurrency).await }) 64 | }, 65 | ); 66 | } 67 | 68 | group.finish(); 69 | } 70 | 71 | criterion_group!(benches, provider_concurrency); 72 | criterion_main!(benches); 73 | -------------------------------------------------------------------------------- /benches/usdc_holders_backfill.rs: -------------------------------------------------------------------------------- 1 | mod utils; 2 | 3 | use std::sync::Arc; 4 | 5 | use color_eyre::Result; 6 | use criterion::*; 7 | use diesel::{ 8 | sql_query, 9 | sql_types::{Array, Bytea, Integer}, 10 | RunQueryDsl, 11 | }; 12 | use ethui_indexer::{ 13 | config::Config, 14 | db::{types::Address, Db}, 15 | sync::{BackfillManager, RethProviderFactory, StopStrategy}, 16 | }; 17 | use tokio::sync::mpsc; 18 | 19 | use self::utils::one_time_setup; 20 | 21 | /// truncates DB 22 | /// seeds 1000 initial users 23 | /// and creates a set of backfill jobs 24 | fn setup(concurrency: usize, jobs: u64, job_size: u64) -> Result { 25 | let (mut config, mut conn) = utils::setup("benches/ethui-indexer.toml")?; 26 | config.sync.backfill_concurrency = concurrency; 27 | 28 | let addresses: Vec
= 29 | std::fs::read_to_string("benches/datasets/sepolia-usdc-holders.txt")? 30 | .lines() 31 | .take(1000) 32 | .map(|l| Address(l.parse().unwrap())) 33 | .collect(); 34 | 35 | // create N non-overlapping jobs 36 | for i in 0..jobs { 37 | // the "+ 1" ensures each job is non-adjacent and does not reorg into a single large block 38 | let start_block = config.chain.start_block as i32 - i as i32 * (job_size as i32 * 2); 39 | sql_query( 40 | "INSERT INTO backfill_jobs (low, high, chain_id, addresses) VALUES ($1, $2, $3, $4)", 41 | ) 42 | .bind::(start_block - job_size as i32) 43 | .bind::(start_block) 44 | .bind::(config.chain.chain_id) 45 | .bind::, _>(&addresses[0..1]) 46 | .execute(&mut conn)?; 47 | } 48 | 49 | Ok(config) 50 | } 51 | 52 | async fn run(config: Config) -> Result<()> { 53 | let (account_tx, _account_rx) = mpsc::unbounded_channel(); 54 | let (job_tx, job_rx) = mpsc::unbounded_channel(); 55 | let db = Db::connect(&config, account_tx, job_tx).await?; 56 | let chain = db.setup_chain(&config.chain).await?; 57 | 58 | let provider_factory = Arc::new(RethProviderFactory::new(&config, &chain)?); 59 | let backfill = BackfillManager::new( 60 | db.clone(), 61 | &config, 62 | provider_factory, 63 | job_rx, 64 | StopStrategy::OnFinish, 65 | ); 66 | 67 | backfill.run().await?; 68 | 69 | Ok(()) 70 | } 71 | 72 | /// Processes a total of 100k blocks in different configurations: 73 | /// - from 1 to 10000 concurrent jobs 74 | /// - job size varies from 1 block to 1000 blocks per job 75 | fn backfill_1000jobsx1000blocks(c: &mut Criterion) { 76 | one_time_setup("benches/ethui-indexer.toml").unwrap(); 77 | 78 | let rt = tokio::runtime::Runtime::new().unwrap(); 79 | let mut group = c.benchmark_group("backfill_1000jobsx1000blocks"); 80 | group.sample_size(10); 81 | let jobs = 128; 82 | let job_size = 40; 83 | group.throughput(Throughput::Elements(jobs * job_size)); 84 | 85 | for concurrency in [1, 16, 32, 64, 128].iter() { 86 | group.bench_with_input( 87 | BenchmarkId::from_parameter(concurrency), 88 | concurrency, 89 | |b, concurrency| { 90 | b.to_async(&rt).iter_batched( 91 | || { 92 | setup(*concurrency, jobs, job_size) 93 | .unwrap_or_else(|e| panic!("{}", e.to_string())) 94 | }, 95 | |config| async move { run(config).await }, 96 | BatchSize::LargeInput, 97 | ) 98 | }, 99 | ); 100 | } 101 | 102 | group.finish(); 103 | } 104 | 105 | criterion_group!(benches, backfill_1000jobsx1000blocks); 106 | criterion_main!(benches); 107 | -------------------------------------------------------------------------------- /benches/utils/mod.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | #![allow(dead_code)] 3 | 4 | use std::{env, path::PathBuf}; 5 | 6 | use color_eyre::{eyre::eyre, Result}; 7 | use diesel::{ 8 | sql_query, 9 | sql_types::{Bytea, Integer}, 10 | Connection, PgConnection, RunQueryDsl, 11 | }; 12 | use diesel_migrations::MigrationHarness; 13 | use ethui_indexer::{ 14 | config::Config, 15 | db::{types::Address, MIGRATIONS}, 16 | }; 17 | 18 | pub fn one_time_setup(config_file: &str) -> Result<()> { 19 | let url = env::var("TEST_DATABASE_URL").expect("TEST_DATABASE_URL must be set"); 20 | let mut conn = PgConnection::establish(&url)?; 21 | let config = Config::read_from(&PathBuf::from(config_file))?; 22 | 23 | conn.run_pending_migrations(MIGRATIONS) 24 | .map(|_| ()) 25 | .map_err(|e| eyre!("{}", e))?; 26 | 27 | sql_query("TRUNCATE TABLE accounts CASCADE").execute(&mut conn)?; 28 | sql_query("TRUNCATE TABLE chains CASCADE").execute(&mut conn)?; 29 | 30 | sql_query("INSERT INTO chains (chain_id, start_block, last_known_block) VALUES ($1, $2, $3)") 31 | .bind::(config.chain.chain_id) 32 | .bind::(0) 33 | .bind::(config.chain.start_block as i32) 34 | .execute(&mut conn)?; 35 | 36 | let addresses: Vec
= 37 | std::fs::read_to_string("benches/datasets/sepolia-usdc-holders.txt")? 38 | .lines() 39 | .take(1000) 40 | .map(|l| Address(l.parse().unwrap())) 41 | .collect(); 42 | 43 | for address in addresses.iter() { 44 | sql_query("INSERT INTO accounts (address, chain_id) VALUES ($1, $2)") 45 | .bind::(address) 46 | .bind::(config.chain.chain_id) 47 | .execute(&mut conn) 48 | .unwrap(); 49 | } 50 | 51 | Ok(()) 52 | } 53 | 54 | // clear the test database in between each run 55 | // and seeds initial accounts from a file 56 | pub fn setup(config_file: &str) -> Result<(Config, PgConnection)> { 57 | let url = env::var("TEST_DATABASE_URL").expect("TEST_DATABASE_URL must be set"); 58 | let mut conn = PgConnection::establish(&url)?; 59 | 60 | sql_query("TRUNCATE TABLE backfill_jobs CASCADE").execute(&mut conn)?; 61 | sql_query("TRUNCATE TABLE txs CASCADE").execute(&mut conn)?; 62 | 63 | let config = Config::read_from(&PathBuf::from(config_file))?; 64 | 65 | Ok((config, conn)) 66 | } 67 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | [advisories] 2 | vulnerability = "deny" 3 | unmaintained = "warn" 4 | yanked = "warn" 5 | notice = "warn" 6 | 7 | [licenses] 8 | allow = [ 9 | "MIT", 10 | "Unlicense", 11 | "ISC", 12 | "Unicode-DFS-2016", 13 | "Apache-2.0", 14 | "Apache-2.0 WITH LLVM-exception", 15 | "MPL-2.0", 16 | "BSD-2-Clause", 17 | "BSD-3-Clause", 18 | "CC0-1.0", 19 | "OpenSSL", 20 | ] 21 | exceptions = [ 22 | # As stated in https://github.com/gakonst/ethers-rs/blob/master/deny.toml 23 | # CC0 is a permissive license but somewhat unclear status for source code 24 | # so we prefer to not have dependencies using it 25 | # https://tldrlegal.com/license/creative-commons-cc0-1.0-universal 26 | { allow = ["CC0-1.0"], name = "tiny-keccak" }, 27 | ] 28 | 29 | [[licenses.clarify]] 30 | name = "ring" 31 | # SPDX considers OpenSSL to encompass both the OpenSSL and SSLeay licenses 32 | # https://spdx.org/licenses/OpenSSL.html 33 | # ISC - Both BoringSSL and ring use this for their new files 34 | # MIT - "Files in third_party/ have their own licenses, as described therein. The MIT 35 | # license, for third_party/fiat, which, unlike other third_party directories, is 36 | # compiled into non-test libraries, is included below." 37 | # OpenSSL - Obviously 38 | expression = "ISC AND MIT AND OpenSSL" 39 | license-files = [{ path = "LICENSE", hash = 0xbd0eed23 }] 40 | -------------------------------------------------------------------------------- /diesel.patch: -------------------------------------------------------------------------------- 1 | @@ -15,1 +15,1 @@ diesel::table! { 2 | - addresses -> Array>, 3 | + addresses -> Array, 4 | -------------------------------------------------------------------------------- /diesel.toml: -------------------------------------------------------------------------------- 1 | # For documentation on how to configure this file, 2 | # see https://diesel.rs/guides/configuring-diesel-cli 3 | 4 | [print_schema] 5 | file = "src/db/schema.rs" 6 | custom_type_derives = ["diesel::query_builder::QueryId"] 7 | patch_file = "diesel.patch" 8 | 9 | [migrations_directory] 10 | dir = "migrations" 11 | -------------------------------------------------------------------------------- /eip712.json: -------------------------------------------------------------------------------- 1 | { 2 | "types": { 3 | "EIP712Domain": [ 4 | { 5 | "name": "name", 6 | "type": "string" 7 | }, 8 | { 9 | "name": "version", 10 | "type": "string" 11 | }, 12 | { 13 | "name": "chainId", 14 | "type": "uint256" 15 | }, 16 | { 17 | "name": "verifyingContract", 18 | "type": "address" 19 | } 20 | ], 21 | "IndexerAuth": [ 22 | { "name": "address", "type": "address" }, 23 | { "name": "validUntil", "type": "uint64" } 24 | ] 25 | }, 26 | "domain": { 27 | "name": "ethui-indexer", 28 | "version": "1", 29 | "chainId": 1, 30 | "verifyingContract": "0x0000000000000000000000000000000000000000" 31 | }, 32 | "primaryType": "IndexerAuth", 33 | "message": { 34 | "address": "0x0077014b4C74d9b1688847386B24Ed23Fdf14Be8", 35 | "validUntil": 1754484924 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /ethui-indexer.toml: -------------------------------------------------------------------------------- 1 | [reth] 2 | db = "/mnt/data/eth/sepolia/reth/db" 3 | static_files = "/mnt/data/eth/sepolia/reth/static_files" 4 | 5 | [chain] 6 | chain_id = 11155111 7 | start_block = 4700000 8 | 9 | [sync] 10 | buffer_size = 1000 11 | 12 | [http] 13 | port = 8080 14 | jwt_secret_env = "ETHUI_JWT_SECRET" 15 | 16 | [db] 17 | url = "postgres://ethui_indexer:ethui-indexer&12345@localhost/ethui_indexer" 18 | 19 | [payment] 20 | address = "0x0063A660Fb166E9deF01C7B4fd0303B054Ed1B9e" 21 | min_amount = "10000000000000000" # 0.01 ether 22 | 23 | [whitelist] 24 | file = "/home/naps62/ethui/whitelists/lists/all.txt" 25 | -------------------------------------------------------------------------------- /migrations/00000000000000_diesel_initial_setup/down.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); 6 | DROP FUNCTION IF EXISTS diesel_set_updated_at(); 7 | -------------------------------------------------------------------------------- /migrations/00000000000000_diesel_initial_setup/up.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | 6 | 7 | 8 | -- Sets up a trigger for the given table to automatically set a column called 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included 10 | -- in the modified columns) 11 | -- 12 | -- # Example 13 | -- 14 | -- ```sql 15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); 16 | -- 17 | -- SELECT diesel_manage_updated_at('users'); 18 | -- ``` 19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ 20 | BEGIN 21 | EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s 22 | FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); 23 | END; 24 | $$ LANGUAGE plpgsql; 25 | 26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ 27 | BEGIN 28 | IF ( 29 | NEW IS DISTINCT FROM OLD AND 30 | NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at 31 | ) THEN 32 | NEW.updated_at := current_timestamp; 33 | END IF; 34 | RETURN NEW; 35 | END; 36 | $$ LANGUAGE plpgsql; 37 | -------------------------------------------------------------------------------- /migrations/2023-11-27-093047_enable_extensions/down.sql: -------------------------------------------------------------------------------- 1 | DROP EXTENSION citext; 2 | -------------------------------------------------------------------------------- /migrations/2023-11-27-093047_enable_extensions/up.sql: -------------------------------------------------------------------------------- 1 | CREATE EXTENSION IF NOT EXISTS citext; 2 | -------------------------------------------------------------------------------- /migrations/2023-11-28-163820_create_accounts/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE accounts; 2 | -------------------------------------------------------------------------------- /migrations/2023-11-28-163820_create_accounts/up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE accounts ( 2 | address BYTEA NOT NULL, 3 | chain_id INTEGER NOT NULL, 4 | created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 5 | updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 6 | PRIMARY KEY (address, chain_id) 7 | ); 8 | -------------------------------------------------------------------------------- /migrations/2023-11-28-190011_create_txs/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE txs; 2 | -------------------------------------------------------------------------------- /migrations/2023-11-28-190011_create_txs/up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE txs ( 2 | address BYTEA NOT NULL, 3 | chain_id INTEGER NOT NULL, 4 | hash BYTEA NOT NULL, 5 | block_number INTEGER NOT NULL, 6 | created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 7 | updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 8 | PRIMARY KEY (address, chain_id, hash), 9 | FOREIGN KEY (address, chain_id) REFERENCES accounts (address, chain_id) 10 | ); 11 | -------------------------------------------------------------------------------- /migrations/2023-11-30-220900_chains/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE chains; 2 | -------------------------------------------------------------------------------- /migrations/2023-11-30-220900_chains/up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE chains ( 2 | chain_id INTEGER NOT NULL, 3 | start_block INTEGER NOT NULL, 4 | last_known_block INTEGER NOT NULL, 5 | updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 6 | PRIMARY KEY (chain_id) 7 | ); 8 | -------------------------------------------------------------------------------- /migrations/2023-12-01-114852_create_backfill_jobs/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE jobs; 2 | -------------------------------------------------------------------------------- /migrations/2023-12-01-114852_create_backfill_jobs/up.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE backfill_jobs ( 2 | id SERIAL NOT NULL, 3 | addresses BYTEA[] NOT NULL, 4 | chain_id INTEGER NOT NULL, 5 | low INTEGER NOT NULL, 6 | high INTEGER NOT NULL, 7 | created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 8 | updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, 9 | PRIMARY KEY (id), 10 | FOREIGN KEY (chain_id) REFERENCES chains (chain_id) 11 | ); 12 | -------------------------------------------------------------------------------- /src/api/app.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr as _; 2 | 3 | use axum::{ 4 | extract::{MatchedPath, State}, 5 | http::Request, 6 | middleware::from_extractor, 7 | response::IntoResponse, 8 | routing::{get, post}, 9 | Extension, Json, Router, 10 | }; 11 | use color_eyre::eyre::eyre; 12 | use ethers_core::types::{Address, Signature}; 13 | use jsonwebtoken::{encode, DecodingKey, EncodingKey, Header}; 14 | use serde::{Deserialize, Serialize}; 15 | use serde_json::json; 16 | use tower_http::{cors::CorsLayer, trace::TraceLayer}; 17 | use tracing::info_span; 18 | 19 | use super::{ 20 | app_state::AppState, 21 | auth::{Claims, IndexerAuth}, 22 | error::{ApiError, ApiResult}, 23 | registration::RegistrationProof, 24 | }; 25 | 26 | pub fn app(jwt_secret: String, state: AppState) -> Router { 27 | let encoding_key = EncodingKey::from_secret(jwt_secret.as_ref()); 28 | let decoding_key = DecodingKey::from_secret(jwt_secret.as_ref()); 29 | 30 | let protected_routes = Router::new() 31 | .route("/test", post(test)) 32 | .route("/history", post(history)) 33 | .route_layer(from_extractor::()); 34 | 35 | let public_routes = Router::new() 36 | .route("/health", get(health)) 37 | .route("/is_whitelisted", get(is_whitelisted)) 38 | .route("/auth", post(auth)) 39 | .route("/register", post(register)); 40 | 41 | Router::new() 42 | .nest("/api", protected_routes) 43 | .nest("/api", public_routes) 44 | .layer(CorsLayer::permissive()) 45 | .layer(Extension(encoding_key)) 46 | .layer(Extension(decoding_key)) 47 | .with_state(state) 48 | .layer( 49 | TraceLayer::new_for_http().make_span_with(|req: &Request<_>| { 50 | // Log the matched route's path (with placeholders not filled in). 51 | // Use request.uri() or OriginalUri if you want the real path. 52 | let matched_path = req 53 | .extensions() 54 | .get::() 55 | .map(MatchedPath::as_str); 56 | 57 | info_span!( 58 | "http_request", 59 | method = ?req.method(), 60 | matched_path, 61 | some_other_field = tracing::field::Empty, 62 | ) 63 | }), 64 | ) 65 | } 66 | 67 | async fn health() -> impl IntoResponse {} 68 | 69 | pub async fn test(State(_state): State) -> impl IntoResponse { 70 | Json(json!({"foo": "bar"})) 71 | } 72 | 73 | pub async fn history( 74 | State(state): State, 75 | Claims { sub: address, .. }: Claims, 76 | ) -> ApiResult { 77 | let addr = alloy_primitives::Address::from_str(&format!("0x{:x}", address)).unwrap(); 78 | 79 | let history = state.db.history(&addr.into()).await?; 80 | 81 | Ok(Json(json!(history))) 82 | } 83 | 84 | #[derive(Debug, Serialize, Deserialize)] 85 | pub struct IsWhitelistedResponse { 86 | address: Address, 87 | } 88 | 89 | // GET /api/is_whitelisted 90 | pub async fn is_whitelisted( 91 | State(state): State, 92 | Json(IsWhitelistedResponse { address }): Json, 93 | ) -> ApiResult { 94 | let addr = reth_primitives::Address::from_str(&format!("0x{:x}", address)).unwrap(); 95 | 96 | let is_whitelisted = state.config.whitelist.is_whitelisted(&addr); 97 | Ok(Json(json!({ "result": is_whitelisted }))) 98 | } 99 | 100 | #[derive(Debug, Deserialize, Serialize)] 101 | pub struct RegisterRequest { 102 | address: Address, 103 | proof: RegistrationProof, 104 | } 105 | 106 | // POST /api/register 107 | pub async fn register( 108 | State(state): State, 109 | Json(register): Json, 110 | ) -> ApiResult { 111 | let addr = reth_primitives::Address::from_str(&format!("0x{:x}", register.address)).unwrap(); 112 | 113 | register.proof.validate(addr, &state).await?; 114 | 115 | state.db.register(register.address.into()).await?; 116 | 117 | Ok(Json(json!({"result": "success"}))) 118 | } 119 | 120 | #[derive(Debug, Serialize, Deserialize)] 121 | pub struct AuthRequest { 122 | signature: String, 123 | data: IndexerAuth, 124 | } 125 | 126 | #[derive(Debug, Deserialize, Serialize)] 127 | pub struct AuthResponse { 128 | access_token: String, 129 | } 130 | 131 | // POST /api/auth 132 | pub async fn auth( 133 | Extension(encoding_key): Extension, 134 | State(AppState { db, .. }): State, 135 | Json(auth): Json, 136 | ) -> ApiResult { 137 | let sig = Signature::from_str(&auth.signature).map_err(|_| eyre!("Invalid signature"))?; 138 | auth.data 139 | .check(&sig) 140 | .map_err(|_| ApiError::InvalidCredentials)?; 141 | 142 | if !db.is_registered(auth.data.address.into()).await? { 143 | return Err(ApiError::NotRegistered); 144 | } 145 | 146 | let access_token = encode(&Header::default(), &Claims::from(auth.data), &encoding_key)?; 147 | 148 | // Send the authorized token 149 | Ok(Json(AuthResponse { access_token })) 150 | } 151 | 152 | #[cfg(test)] 153 | mod test { 154 | 155 | use axum::{ 156 | body::Body, 157 | http::{Request, StatusCode}, 158 | Router, 159 | }; 160 | use color_eyre::Result; 161 | use ethers_core::types::Address; 162 | use rstest::rstest; 163 | use serde::Serialize; 164 | use serial_test::serial; 165 | use tower::{Service, ServiceExt}; 166 | 167 | use super::AuthRequest; 168 | use crate::{ 169 | api::{ 170 | app::{AuthResponse, RegisterRequest}, 171 | app_state::AppState, 172 | auth::IndexerAuth, 173 | registration::RegistrationProof, 174 | test_utils::{address, now, sign_typed_data, to_json_resp, wrong_address}, 175 | }, 176 | config::Config, 177 | db::Db, 178 | }; 179 | 180 | fn get(uri: &str) -> Request { 181 | Request::builder() 182 | .uri(uri) 183 | .method("GET") 184 | .header("content-type", "application/json") 185 | .body(Body::empty()) 186 | .unwrap() 187 | } 188 | 189 | fn get_with_query(uri: &str, query: T) -> Request { 190 | // let mut url = Url::parse(uri).expect("Invalid URI"); 191 | // let query = serde_json::to_string(&query).expect("failed to serialize query"); 192 | // url.set_query(Some(&query)); 193 | let json = serde_json::to_string(&query).expect("Failed to serialize JSON"); 194 | 195 | Request::builder() 196 | .uri(uri) 197 | .method("GET") 198 | .header("content-type", "application/json") 199 | .body(Body::from(json)) 200 | .unwrap() 201 | } 202 | 203 | fn post(uri: &str, body: B) -> Request { 204 | Request::builder() 205 | .uri(uri) 206 | .method("POST") 207 | .header("content-type", "application/json") 208 | .body(Body::from(serde_json::to_string(&body).unwrap())) 209 | .unwrap() 210 | } 211 | 212 | fn post_with_jwt(uri: &str, jwt: String, body: B) -> Request { 213 | Request::builder() 214 | .uri(uri) 215 | .method("POST") 216 | .header("content-type", "application/json") 217 | .header("Authorization", format!("Bearer {}", jwt)) 218 | .body(Body::from(serde_json::to_string(&body).unwrap())) 219 | .unwrap() 220 | } 221 | 222 | async fn build_app() -> Router { 223 | let jwt_secret = "secret".to_owned(); 224 | let db = Db::connect_test().await.unwrap(); 225 | let config = Config::for_test(); 226 | 227 | let state = AppState { 228 | db, 229 | config, 230 | provider_factory: None, 231 | }; 232 | 233 | super::app(jwt_secret, state) 234 | } 235 | 236 | #[rstest] 237 | #[tokio::test] 238 | #[serial] 239 | async fn test_register(address: Address) -> Result<()> { 240 | let app = build_app().await; 241 | let req = post( 242 | "/api/register", 243 | RegisterRequest { 244 | address, 245 | proof: RegistrationProof::Test, 246 | }, 247 | ); 248 | let resp = app.clone().oneshot(req).await?; 249 | 250 | assert_eq!(resp.status(), StatusCode::OK); 251 | Ok(()) 252 | } 253 | 254 | #[rstest] 255 | #[tokio::test] 256 | #[serial] 257 | async fn test_auth(address: Address, now: u64) -> Result<()> { 258 | let app = build_app().await; 259 | let valid_until = now + 20 * 60; 260 | let data = IndexerAuth::new(address, valid_until); 261 | 262 | let registration = post( 263 | "/api/register", 264 | RegisterRequest { 265 | address, 266 | proof: RegistrationProof::Test, 267 | }, 268 | ); 269 | app.clone().oneshot(registration).await?; 270 | 271 | let auth = post( 272 | "/api/auth", 273 | AuthRequest { 274 | signature: sign_typed_data(&data).await?.to_string(), 275 | data, 276 | }, 277 | ); 278 | 279 | let resp = app.oneshot(auth).await?; 280 | assert_eq!(resp.status(), StatusCode::OK); 281 | Ok(()) 282 | } 283 | 284 | #[rstest] 285 | #[tokio::test] 286 | #[serial] 287 | async fn test_auth_twice(address: Address, now: u64) -> Result<()> { 288 | let mut app = build_app().await; 289 | let valid_until = now + 20 * 60; 290 | let data = IndexerAuth::new(address, valid_until); 291 | 292 | let registration = post( 293 | "/api/register", 294 | RegisterRequest { 295 | address, 296 | proof: RegistrationProof::Test, 297 | }, 298 | ); 299 | app.clone().oneshot(registration).await?; 300 | 301 | let req = post( 302 | "/api/auth", 303 | AuthRequest { 304 | signature: sign_typed_data(&data).await?.to_string(), 305 | data: data.clone(), 306 | }, 307 | ); 308 | let req2 = post( 309 | "/api/auth", 310 | AuthRequest { 311 | signature: sign_typed_data(&data).await?.to_string(), 312 | data, 313 | }, 314 | ); 315 | 316 | let resp = app.call(req).await?; 317 | assert_eq!(resp.status(), StatusCode::OK); 318 | 319 | let resp = app.oneshot(req2).await?; 320 | assert_eq!(resp.status(), StatusCode::OK); 321 | Ok(()) 322 | } 323 | 324 | #[rstest] 325 | #[tokio::test] 326 | #[serial] 327 | async fn test_auth_expired_signature(address: Address, now: u64) -> Result<()> { 328 | let app = build_app().await; 329 | let valid_until = now - 20; 330 | let data = IndexerAuth::new(address, valid_until); 331 | 332 | let req = post( 333 | "/api/auth", 334 | AuthRequest { 335 | signature: sign_typed_data(&data).await?.to_string(), 336 | data, 337 | }, 338 | ); 339 | 340 | let resp = app.oneshot(req).await?; 341 | assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); 342 | Ok(()) 343 | } 344 | 345 | #[rstest] 346 | #[tokio::test] 347 | #[serial] 348 | async fn test_auth_invalid_signature(address: Address, now: u64) -> Result<()> { 349 | let app = build_app().await; 350 | let valid_until = now + 20 * 60; 351 | let data = IndexerAuth::new(address, valid_until); 352 | let invalid_data = IndexerAuth::new(Address::zero(), valid_until); 353 | 354 | let req = post( 355 | "/api/auth", 356 | AuthRequest { 357 | signature: sign_typed_data(&invalid_data).await?.to_string(), 358 | data, 359 | }, 360 | ); 361 | 362 | let resp = app.oneshot(req).await?; 363 | assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); 364 | Ok(()) 365 | } 366 | 367 | #[rstest] 368 | #[tokio::test] 369 | #[serial] 370 | async fn test_protected_endpoint_without_auth() -> Result<()> { 371 | let app = build_app().await; 372 | let req = post("/api/test", ()); 373 | let resp = app.oneshot(req).await?; 374 | assert_eq!(resp.status(), StatusCode::UNAUTHORIZED); 375 | Ok(()) 376 | } 377 | 378 | #[rstest] 379 | #[tokio::test] 380 | #[serial] 381 | async fn test_protected_endpoint_with_auth(address: Address, now: u64) -> Result<()> { 382 | let app = build_app().await; 383 | let valid_until = now + 20; 384 | let data = IndexerAuth::new(address, valid_until); 385 | 386 | let registration = post( 387 | "/api/register", 388 | RegisterRequest { 389 | address, 390 | proof: RegistrationProof::Test, 391 | }, 392 | ); 393 | app.clone().oneshot(registration).await?; 394 | 395 | let req = post( 396 | "/api/auth", 397 | AuthRequest { 398 | signature: sign_typed_data(&data).await?.to_string(), 399 | data, 400 | }, 401 | ); 402 | 403 | let resp = app.clone().oneshot(req).await?; 404 | let jwt: AuthResponse = to_json_resp(resp).await?; 405 | // 406 | let req = post_with_jwt("/api/test", jwt.access_token, ()); 407 | let resp = app.oneshot(req).await?; 408 | assert_eq!(resp.status(), StatusCode::OK); 409 | Ok(()) 410 | } 411 | 412 | #[rstest] 413 | #[tokio::test] 414 | #[serial] 415 | async fn test_unprotected_endpoint() -> Result<()> { 416 | let app = build_app().await; 417 | let req = get("/api/health"); 418 | let resp = app.oneshot(req).await?; 419 | assert_eq!(resp.status(), StatusCode::OK); 420 | Ok(()) 421 | } 422 | 423 | #[rstest] 424 | #[tokio::test] 425 | #[serial] 426 | async fn test_is_whitelisted_endpoint(address: Address) -> Result<()> { 427 | let app = build_app().await; 428 | 429 | let req = get_with_query( 430 | "/api/is_whitelisted", 431 | super::IsWhitelistedResponse { address }, 432 | ); 433 | let resp: serde_json::Value = to_json_resp(app.oneshot(req).await?).await?; 434 | assert_eq!(resp["result"].as_bool(), Some(true)); 435 | 436 | Ok(()) 437 | } 438 | 439 | #[rstest] 440 | #[tokio::test] 441 | #[serial] 442 | async fn test_is_whitelisted_endpoint_wrong_address(wrong_address: Address) -> Result<()> { 443 | let app = build_app().await; 444 | 445 | let req = get_with_query( 446 | "/api/is_whitelisted", 447 | super::IsWhitelistedResponse { 448 | address: wrong_address, 449 | }, 450 | ); 451 | let resp: serde_json::Value = to_json_resp(app.oneshot(req).await?).await?; 452 | assert_eq!(resp["result"].as_bool(), Some(false)); 453 | 454 | Ok(()) 455 | } 456 | } 457 | -------------------------------------------------------------------------------- /src/api/app_state.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::{config::Config, db::Db, sync::RethProviderFactory}; 4 | 5 | #[derive(Clone)] 6 | pub struct AppState { 7 | pub db: Db, 8 | pub config: Config, 9 | pub provider_factory: Option>, 10 | } 11 | -------------------------------------------------------------------------------- /src/api/auth/middleware.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | use axum::{ 3 | extract::FromRequestParts, 4 | http::{request::Parts, StatusCode}, 5 | Extension, RequestPartsExt, 6 | }; 7 | use axum_extra::{ 8 | headers::{authorization::Bearer, Authorization}, 9 | TypedHeader, 10 | }; 11 | use jsonwebtoken::{decode, DecodingKey, Validation}; 12 | 13 | use super::Claims; 14 | 15 | #[async_trait] 16 | impl FromRequestParts for Claims 17 | where 18 | S: Send + Sync, 19 | { 20 | type Rejection = StatusCode; 21 | 22 | async fn from_request_parts(parts: &mut Parts, state: &S) -> Result { 23 | let Extension(key) = Extension::::from_request_parts(parts, state) 24 | .await 25 | .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; 26 | 27 | // Extract the token from the authorization header 28 | let TypedHeader(Authorization(bearer)) = parts 29 | .extract::>>() 30 | .await 31 | .map_err(|_| StatusCode::UNAUTHORIZED)?; 32 | 33 | // Decode the user data 34 | let token_data = decode::(bearer.token(), &key, &Validation::default()) 35 | .map_err(|_| StatusCode::UNAUTHORIZED)?; 36 | 37 | // TODO do we need to verify the claim is not expired? 38 | 39 | Ok(token_data.claims) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/api/auth/mod.rs: -------------------------------------------------------------------------------- 1 | mod middleware; 2 | mod signature; 3 | 4 | pub use signature::{Claims, IndexerAuth}; 5 | -------------------------------------------------------------------------------- /src/api/auth/signature.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use color_eyre::{eyre::bail, Result}; 4 | use ethers_contract_derive::{Eip712, EthAbiType}; 5 | use ethers_core::types::{transaction::eip712::Eip712, Address, Signature}; 6 | use serde::{Deserialize, Serialize}; 7 | 8 | #[derive(Debug, Clone, Eip712, EthAbiType, Serialize, Deserialize)] 9 | #[eip712( 10 | name = "ethui-indexer", 11 | version = "1", 12 | chain_id = 1, 13 | verifying_contract = "0x0000000000000000000000000000000000000000" 14 | )] 15 | pub struct IndexerAuth { 16 | pub address: Address, 17 | pub valid_until: u64, 18 | } 19 | 20 | #[derive(Debug, Serialize, Deserialize)] 21 | pub struct Claims { 22 | pub sub: Address, 23 | pub exp: u64, 24 | } 25 | 26 | impl From for Claims { 27 | fn from(value: IndexerAuth) -> Self { 28 | Self { 29 | sub: value.address, 30 | exp: value.valid_until, 31 | } 32 | } 33 | } 34 | 35 | impl From for IndexerAuth { 36 | fn from(value: Claims) -> Self { 37 | Self { 38 | address: value.sub, 39 | valid_until: value.exp, 40 | } 41 | } 42 | } 43 | 44 | impl IndexerAuth { 45 | #[allow(dead_code)] 46 | pub fn new(address: Address, valid_until: u64) -> Self { 47 | Self { 48 | address, 49 | valid_until, 50 | } 51 | } 52 | 53 | pub fn check(&self, signature: &Signature) -> Result<()> { 54 | self.check_expiration()?; 55 | let hash = self.encode_eip712()?; 56 | signature.verify(hash, self.address)?; 57 | 58 | Ok(()) 59 | } 60 | 61 | fn check_expiration(&self) -> Result<()> { 62 | let now = std::time::SystemTime::now() 63 | .duration_since(std::time::UNIX_EPOCH)? 64 | .as_secs(); 65 | 66 | if self.valid_until <= now { 67 | bail!("signature timestamp has expired"); 68 | } 69 | 70 | Ok(()) 71 | } 72 | 73 | #[allow(dead_code)] 74 | pub fn get_address(&self) -> reth_primitives::Address { 75 | reth_primitives::Address::from_str(&format!("0x{:x}", self.address)).unwrap() 76 | } 77 | } 78 | 79 | #[cfg(test)] 80 | mod test { 81 | 82 | use color_eyre::Result; 83 | use ethers_core::types::{ 84 | transaction::eip712::{Eip712, TypedData}, 85 | Address, 86 | }; 87 | use rstest::rstest; 88 | 89 | use super::*; 90 | use crate::api::test_utils::{address, now, sign_typed_data}; 91 | 92 | #[rstest] 93 | #[tokio::test] 94 | async fn check_signature(address: Address, now: u64) -> Result<()> { 95 | let data = IndexerAuth::new(address, now + 20); 96 | let signature = sign_typed_data(&data).await?; 97 | 98 | data.check(&signature)?; 99 | Ok(()) 100 | } 101 | 102 | #[rstest] 103 | #[tokio::test] 104 | async fn test_encoding(address: Address, now: u64) -> Result<()> { 105 | let valid_until = now + 5 * 60; 106 | 107 | let json = serde_json::json!( { 108 | "types": { 109 | "EIP712Domain": [ 110 | { 111 | "name": "name", 112 | "type": "string" 113 | }, 114 | { 115 | "name": "version", 116 | "type": "string" 117 | }, 118 | { 119 | "name": "chainId", 120 | "type": "uint256" 121 | }, 122 | { 123 | "name": "verifyingContract", 124 | "type": "address" 125 | } 126 | ], 127 | "IndexerAuth": [ 128 | { 129 | "name": "address", 130 | "type": "address" 131 | }, 132 | { 133 | "name": "validUntil", 134 | "type": "uint64" 135 | } 136 | ] 137 | }, 138 | "primaryType": "IndexerAuth", 139 | "domain": { 140 | "name": "ethui-indexer", 141 | "version": "1", 142 | "chainId": 1, 143 | "verifyingContract": "0x0000000000000000000000000000000000000000", 144 | }, 145 | "message": { 146 | "address": format!("0x{:x}", address), 147 | "validUntil": valid_until 148 | } 149 | }); 150 | 151 | let expected_data: TypedData = serde_json::from_value(json).unwrap(); 152 | let expected_hash = expected_data.encode_eip712()?; 153 | 154 | let data = IndexerAuth::new(address, valid_until); 155 | let hash = data.encode_eip712()?; 156 | 157 | assert_eq!(expected_hash, hash); 158 | Ok(()) 159 | } 160 | 161 | #[rstest] 162 | #[tokio::test] 163 | async fn check_fails_with_expired_timestamp(address: Address, now: u64) -> Result<()> { 164 | let data = IndexerAuth::new(address, now - 20); 165 | 166 | assert!(data.check_expiration().is_err()); 167 | Ok(()) 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /src/api/error.rs: -------------------------------------------------------------------------------- 1 | use axum::{ 2 | http::StatusCode, 3 | response::{IntoResponse, Response}, 4 | }; 5 | 6 | #[derive(Debug, thiserror::Error)] 7 | pub enum ApiError { 8 | #[error("Invalid Credentials")] 9 | InvalidCredentials, 10 | 11 | #[error("Not Registered")] 12 | NotRegistered, 13 | 14 | #[error(transparent)] 15 | Jsonwebtoken(#[from] jsonwebtoken::errors::Error), 16 | 17 | #[error(transparent)] 18 | Unknown(#[from] color_eyre::Report), 19 | } 20 | 21 | pub type ApiResult = Result; 22 | 23 | impl IntoResponse for ApiError { 24 | fn into_response(self) -> Response { 25 | let status_code = match self { 26 | ApiError::NotRegistered | ApiError::InvalidCredentials | ApiError::Jsonwebtoken(_) => { 27 | StatusCode::UNAUTHORIZED 28 | } 29 | ApiError::Unknown(_) => StatusCode::INTERNAL_SERVER_ERROR, 30 | }; 31 | 32 | (status_code, self.to_string()).into_response() 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/api/mod.rs: -------------------------------------------------------------------------------- 1 | mod app; 2 | mod app_state; 3 | mod auth; 4 | mod error; 5 | mod registration; 6 | mod test_utils; 7 | 8 | use std::{net::SocketAddr, sync::Arc}; 9 | 10 | use tokio::task::JoinHandle; 11 | use tracing::instrument; 12 | 13 | use self::{app::app, app_state::AppState}; 14 | use crate::{config::Config, db::Db, sync::RethProviderFactory}; 15 | 16 | #[allow(clippy::async_yields_async)] 17 | #[instrument(name = "api", skip(db, config, provider_factory), fields(port = config.http.clone().unwrap().port))] 18 | pub async fn start( 19 | db: Db, 20 | config: Config, 21 | provider_factory: Arc, 22 | ) -> JoinHandle> { 23 | let http_config = config.http.clone().unwrap(); 24 | 25 | let addr = SocketAddr::from(([0, 0, 0, 0], http_config.port)); 26 | let listener = tokio::net::TcpListener::bind(addr).await.unwrap(); 27 | 28 | let state = AppState { 29 | db, 30 | config, 31 | provider_factory: Some(provider_factory), 32 | }; 33 | let app = app(http_config.jwt_secret(), state); 34 | 35 | tokio::spawn(async move { axum::serve(listener, app).await }) 36 | } 37 | -------------------------------------------------------------------------------- /src/api/registration/mod.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::{eyre::eyre, Result}; 2 | use reth_primitives::{Address, TransactionSigned, TxHash}; 3 | use reth_provider::TransactionsProvider as _; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use super::app_state::AppState; 7 | 8 | #[derive(Debug, Serialize, Deserialize)] 9 | #[serde(rename_all = "lowercase")] 10 | pub enum RegistrationProof { 11 | Whitelist, 12 | TxHash(TxHash), 13 | 14 | #[cfg(test)] 15 | Test, 16 | } 17 | 18 | #[allow(unused)] 19 | impl RegistrationProof { 20 | pub async fn validate(&self, address: Address, state: &AppState) -> Result<()> { 21 | match self { 22 | Self::Whitelist => { 23 | if !state.config.whitelist.is_whitelisted(&address) { 24 | return Err(eyre!("Not Whitelisted")); 25 | } 26 | } 27 | 28 | Self::TxHash(hash) => { 29 | let provider = state.provider_factory.clone().unwrap().get()?; 30 | match provider.transaction_by_hash(*hash)? { 31 | Some(tx) => self.validate_tx(address, state, &tx)?, 32 | None => return Err(eyre!("Transaction not found")), 33 | } 34 | } 35 | 36 | #[cfg(test)] 37 | Self::Test => return Ok(()), 38 | }; 39 | 40 | Ok(()) 41 | } 42 | 43 | fn validate_tx( 44 | &self, 45 | address: Address, 46 | state: &AppState, 47 | tx: &TransactionSigned, 48 | ) -> Result<()> { 49 | if tx.recover_signer() != Some(address) { 50 | return Err(eyre!("Transaction origin does not match given address")); 51 | } 52 | 53 | let Some(ref payment_config) = state.config.payment else { 54 | return Ok(()); 55 | }; 56 | 57 | if tx.to() != Some(payment_config.address) { 58 | return Err(eyre!("Transaction must be sent to the payment address")); 59 | } 60 | 61 | if tx.value() < payment_config.min_amount { 62 | return Err(eyre!( 63 | "Transaction value must be at least {}", 64 | payment_config.min_amount 65 | )); 66 | } 67 | 68 | Ok(()) 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/api/test_utils.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | use std::str::FromStr; 4 | 5 | use axum::{ 6 | body::{to_bytes, Body}, 7 | response::Response, 8 | }; 9 | use color_eyre::Result; 10 | use ethers_core::types::{Address, Signature}; 11 | use ethers_signers::{coins_bip39::English, MnemonicBuilder, Signer}; 12 | use serde::de::DeserializeOwned; 13 | 14 | use super::auth::IndexerAuth; 15 | 16 | #[rstest::fixture] 17 | pub fn now() -> u64 { 18 | std::time::SystemTime::now() 19 | .duration_since(std::time::UNIX_EPOCH) 20 | .unwrap() 21 | .as_secs() 22 | } 23 | 24 | #[rstest::fixture] 25 | pub fn address() -> Address { 26 | Address::from_str("0xf39fd6e51aad88f6f4ce6ab8827279cfffb92266").unwrap() 27 | } 28 | 29 | #[rstest::fixture] 30 | pub fn wrong_address() -> Address { 31 | Address::from_str("0x123fd6e51aad88f6f4ce6ab8827279cfffb92266").unwrap() 32 | } 33 | 34 | pub async fn to_json_resp(resp: Response) -> color_eyre::Result { 35 | let bytes = to_bytes(resp.into_body(), usize::MAX).await?; 36 | Ok(serde_json::from_str(std::str::from_utf8(&bytes)?)?) 37 | } 38 | 39 | pub async fn sign_typed_data(data: &IndexerAuth) -> Result { 40 | let mnemonic = String::from("test test test test test test test test test test test junk"); 41 | let derivation_path = String::from("m/44'/60'/0'/0"); 42 | let current_path = format!("{}/{}", derivation_path, 0); 43 | let chain_id = 1_u32; 44 | let signer = MnemonicBuilder::::default() 45 | .phrase(mnemonic.as_ref()) 46 | .derivation_path(¤t_path)? 47 | .build() 48 | .map(|v| v.with_chain_id(chain_id))?; 49 | 50 | let signature = signer.sign_typed_data(data).await?; 51 | 52 | Ok(signature) 53 | } 54 | -------------------------------------------------------------------------------- /src/config/mod.rs: -------------------------------------------------------------------------------- 1 | mod whitelist; 2 | 3 | use std::path::{Path, PathBuf}; 4 | #[cfg(test)] 5 | use std::str::FromStr; 6 | 7 | use clap::Parser; 8 | use color_eyre::eyre::Result; 9 | use serde::Deserialize; 10 | 11 | pub use self::whitelist::WhitelistConfig; 12 | 13 | #[derive(Debug, clap::Parser)] 14 | struct Args { 15 | #[clap( 16 | long, 17 | default_value = "ethui-indexer.toml", 18 | env = "ETHUI_INDEXER_CONFIG" 19 | )] 20 | config: PathBuf, 21 | } 22 | 23 | #[derive(Deserialize, Clone, Debug)] 24 | pub struct Config { 25 | pub reth: RethConfig, 26 | pub chain: ChainConfig, 27 | pub sync: SyncConfig, 28 | 29 | #[serde(default)] 30 | pub http: Option, 31 | 32 | pub db: DbConfig, 33 | pub whitelist: WhitelistConfig, 34 | pub payment: Option, 35 | } 36 | 37 | #[derive(Deserialize, Clone, Debug)] 38 | pub struct RethConfig { 39 | pub db: PathBuf, 40 | pub static_files: PathBuf, 41 | } 42 | 43 | #[derive(Debug, Clone, Deserialize)] 44 | pub struct ChainConfig { 45 | pub chain_id: i32, 46 | #[serde(default = "default_from_block")] 47 | pub start_block: u64, 48 | } 49 | 50 | #[derive(Deserialize, Clone, Debug)] 51 | pub struct SyncConfig { 52 | #[serde(default = "default_buffer_size")] 53 | pub buffer_size: usize, 54 | 55 | #[serde(default = "default_backfill_concurrency")] 56 | pub backfill_concurrency: usize, 57 | } 58 | 59 | #[derive(Deserialize, Debug, Clone)] 60 | pub struct HttpConfig { 61 | #[serde(default = "default_http_port")] 62 | pub port: u16, 63 | 64 | pub jwt_secret_env: String, 65 | } 66 | 67 | #[derive(Deserialize, Debug, Clone)] 68 | pub struct PaymentConfig { 69 | pub address: reth_primitives::Address, 70 | pub min_amount: alloy_primitives::U256, 71 | } 72 | 73 | impl HttpConfig { 74 | pub fn jwt_secret(&self) -> String { 75 | std::env::var(&self.jwt_secret_env).expect("JWT secret not set") 76 | } 77 | } 78 | 79 | #[derive(Deserialize, Clone, Debug)] 80 | pub struct DbConfig { 81 | pub url: String, 82 | } 83 | 84 | impl Config { 85 | pub fn read() -> Result { 86 | let args = Args::parse(); 87 | 88 | let mut config = Self::read_from(args.config.as_path())?; 89 | config.whitelist.preload()?; 90 | 91 | Ok(config) 92 | } 93 | 94 | pub fn read_from(path: &Path) -> Result { 95 | Ok(toml::from_str(&std::fs::read_to_string(path)?)?) 96 | } 97 | } 98 | 99 | impl Default for HttpConfig { 100 | fn default() -> Self { 101 | Self { 102 | port: default_http_port(), 103 | jwt_secret_env: "ETHUI_JWT_SECRET".to_owned(), 104 | } 105 | } 106 | } 107 | 108 | fn default_from_block() -> u64 { 109 | 1 110 | } 111 | 112 | fn default_http_port() -> u16 { 113 | 9500 114 | } 115 | 116 | fn default_buffer_size() -> usize { 117 | 1000 118 | } 119 | 120 | fn default_backfill_concurrency() -> usize { 121 | 10 122 | } 123 | 124 | #[cfg(test)] 125 | impl Config { 126 | pub fn for_test() -> Self { 127 | Self { 128 | reth: RethConfig { 129 | db: PathBuf::from("test-db"), 130 | static_files: PathBuf::from("static"), 131 | }, 132 | chain: ChainConfig { 133 | chain_id: 31337, 134 | start_block: 1, 135 | }, 136 | sync: SyncConfig { 137 | buffer_size: 1000, 138 | backfill_concurrency: 10, 139 | }, 140 | http: None, 141 | db: DbConfig { 142 | url: "none".to_owned(), 143 | }, 144 | payment: None, 145 | whitelist: WhitelistConfig::for_test(vec![reth_primitives::Address::from_str( 146 | "0xf39fd6e51aad88f6f4ce6ab8827279cfffb92266", 147 | ) 148 | .unwrap()]), 149 | } 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/config/whitelist.rs: -------------------------------------------------------------------------------- 1 | use std::{path::PathBuf, str::FromStr}; 2 | 3 | use color_eyre::Result; 4 | use reth_primitives::Address; 5 | use serde::Deserialize; 6 | 7 | #[derive(Deserialize, Clone, Debug, Default)] 8 | pub struct WhitelistConfig { 9 | file: Option, 10 | whitelist: Option>, 11 | } 12 | 13 | impl WhitelistConfig { 14 | pub fn is_whitelisted(&self, addr: &Address) -> bool { 15 | self.whitelist.as_ref().map_or(false, |w| w.contains(addr)) 16 | } 17 | 18 | pub(super) fn preload(&mut self) -> Result<()> { 19 | if self.whitelist.is_some() { 20 | return Ok(()); 21 | } 22 | 23 | if let Some(file) = &self.file { 24 | // load the file 25 | let contents = std::fs::read_to_string(file)?; 26 | let whitelist = contents 27 | .lines() 28 | .map(|line| { 29 | let addr = line.split_whitespace().next().unwrap(); 30 | Address::from_str(addr) 31 | }) 32 | .collect::, _>>()?; 33 | 34 | self.whitelist = Some(whitelist); 35 | } 36 | 37 | Ok(()) 38 | } 39 | 40 | #[cfg(test)] 41 | pub fn for_test(whitelist: Vec
) -> Self { 42 | Self { 43 | file: None, 44 | whitelist: Some(whitelist), 45 | } 46 | } 47 | } 48 | 49 | #[cfg(test)] 50 | mod test { 51 | use std::io::Write; 52 | 53 | use tempfile::NamedTempFile; 54 | 55 | use super::*; 56 | 57 | #[test] 58 | fn test_preload() -> Result<()> { 59 | let mut file = NamedTempFile::new()?; 60 | writeln!( 61 | file, 62 | "0x0063A660Fb166E9deF01C7B4fd0303B054Ed1B9e (ethui.eth)" 63 | )?; 64 | 65 | let path = file.path().to_path_buf(); 66 | 67 | let mut config = WhitelistConfig { 68 | file: Some(path), 69 | whitelist: None, 70 | }; 71 | 72 | config.preload()?; 73 | 74 | let expected_addr = Address::from_str("0x0063A660Fb166E9deF01C7B4fd0303B054Ed1B9e")?; 75 | assert!(config.whitelist.is_some()); 76 | assert_eq!(config.whitelist, Some(vec![expected_addr])); 77 | assert!(config.is_whitelisted(&expected_addr)); 78 | 79 | Ok(()) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/db/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod models; 2 | mod schema; 3 | pub mod types; 4 | 5 | use color_eyre::{eyre::eyre, Result}; 6 | use diesel::{delete, insert_into, prelude::*, update}; 7 | use diesel_async::{ 8 | pooled_connection::{deadpool::Pool, AsyncDieselConnectionManager}, 9 | scoped_futures::ScopedFutureExt, 10 | AsyncConnection, AsyncPgConnection, RunQueryDsl, 11 | }; 12 | use diesel_migrations::{embed_migrations, EmbeddedMigrations, MigrationHarness}; 13 | use models::Txs; 14 | use tokio::sync::mpsc::UnboundedSender; 15 | use tracing::instrument; 16 | 17 | use self::{ 18 | models::{Chain, CreateTx}, 19 | types::Address, 20 | }; 21 | use crate::{ 22 | config::{ChainConfig, Config}, 23 | db::models::{BackfillJob, BackfillJobWithChainId, BackfillJobWithId}, 24 | }; 25 | 26 | pub const MIGRATIONS: EmbeddedMigrations = embed_migrations!("migrations"); 27 | 28 | /// An abstract DB connection 29 | /// In production, `PgBackend` is meant to be used, but the trait allows for the existance of 30 | /// `InMemoryBackend` as well, which is useful for testing 31 | #[derive(Clone)] 32 | pub struct Db { 33 | /// async db pool 34 | pool: Pool, 35 | 36 | /// notify sync job of new accounts 37 | new_accounts_tx: Option>, 38 | 39 | /// notify backfill job of new jobs 40 | /// (which are created from new accounts, but asynchronously, so need their own event) 41 | /// payload is empty because the job only needs a notification to rearrange from DB data 42 | new_job_tx: Option>, 43 | 44 | /// chain ID we're running on 45 | chain_id: i32, 46 | } 47 | 48 | impl Db { 49 | pub async fn connect( 50 | config: &Config, 51 | new_accounts_tx: UnboundedSender, 52 | new_job_tx: UnboundedSender<()>, 53 | ) -> Result { 54 | Self::migrate(&config.db.url).await?; 55 | 56 | let db_config = 57 | AsyncDieselConnectionManager::::new(config.db.url.clone()); 58 | let pool = Pool::builder(db_config).build()?; 59 | 60 | Ok(Self { 61 | pool, 62 | new_accounts_tx: Some(new_accounts_tx), 63 | new_job_tx: Some(new_job_tx), 64 | chain_id: config.chain.chain_id, 65 | }) 66 | } 67 | 68 | #[cfg(test)] 69 | pub async fn connect_test() -> Result { 70 | let db_url = std::env::var("TEST_DATABASE_URL").expect("TEST_DATABASE_URL not set"); 71 | Self::migrate(&db_url).await?; 72 | let db_config = AsyncDieselConnectionManager::::new(db_url); 73 | let pool = Pool::builder(db_config).build()?; 74 | 75 | let res = Self { 76 | pool, 77 | new_accounts_tx: None, 78 | new_job_tx: None, 79 | chain_id: 31337, 80 | }; 81 | 82 | res.truncate().await?; 83 | Ok(res) 84 | } 85 | 86 | #[instrument(skip(url))] 87 | async fn migrate(url: &str) -> Result<()> { 88 | let url = url.to_owned(); 89 | 90 | tokio::task::spawn_blocking(move || { 91 | let mut conn = PgConnection::establish(&url).expect("Failed to connect to DB"); 92 | conn.run_pending_migrations(MIGRATIONS) 93 | .map(|_| ()) 94 | .map_err(|e| eyre!("{}", e)) 95 | }) 96 | .await??; 97 | 98 | Ok(()) 99 | } 100 | 101 | /// Truncate all tables 102 | /// to be called before each unit test 103 | #[cfg(test)] 104 | async fn truncate(&self) -> Result<()> { 105 | use diesel::sql_query; 106 | 107 | let mut conn = self.pool.get().await?; 108 | for table in ["accounts", "chains", "backfill_jobs", "txs"].iter() { 109 | sql_query(format!("TRUNCATE TABLE {} CASCADE", table)) 110 | .execute(&mut conn) 111 | .await 112 | .unwrap(); 113 | } 114 | Ok(()) 115 | } 116 | 117 | /// Seeds the database with a chain configuration 118 | /// Skips if the chain already exists 119 | /// Returns the new or existing chain configuration 120 | #[instrument(skip(self, chain), fields(chain_id = chain.chain_id, start_block = chain.start_block))] 121 | pub async fn setup_chain(&self, chain: &ChainConfig) -> Result { 122 | use schema::chains::dsl::*; 123 | 124 | let mut conn = self.pool.get().await?; 125 | 126 | let res = insert_into(chains) 127 | .values(( 128 | chain_id.eq(chain.chain_id), 129 | start_block.eq(chain.start_block as i32), 130 | last_known_block.eq(chain.start_block as i32 - 1), 131 | )) 132 | .on_conflict_do_nothing() 133 | .execute(&mut conn) 134 | .await; 135 | 136 | handle_error(res).await?; 137 | 138 | let res: Chain = schema::chains::table 139 | .filter(chain_id.eq(chain.chain_id)) 140 | .select(Chain::as_select()) 141 | .first(&mut conn) 142 | .await?; 143 | 144 | Ok(res) 145 | } 146 | 147 | /// Updates the last known block for a chain 148 | #[instrument(skip(self, id))] 149 | pub async fn update_chain(&self, id: u64, last_known: u64) -> Result<()> { 150 | use schema::chains::dsl::*; 151 | 152 | let mut conn = self.pool.get().await?; 153 | 154 | let res = update(chains) 155 | .filter(chain_id.eq(id as i32)) 156 | .set(last_known_block.eq(last_known as i32)) 157 | .execute(&mut conn) 158 | .await; 159 | 160 | handle_error(res).await 161 | } 162 | 163 | /// Register a new account 164 | #[instrument(skip(self))] 165 | pub async fn register(&self, address: Address) -> Result<()> { 166 | use schema::accounts::dsl; 167 | 168 | let mut conn = self.pool.get().await?; 169 | 170 | let res = insert_into(dsl::accounts) 171 | .values((dsl::address.eq(&address), dsl::chain_id.eq(self.chain_id))) 172 | .on_conflict_do_nothing() 173 | .execute(&mut conn) 174 | .await; 175 | 176 | // notify sync job if creation was successful 177 | if let (Ok(_), Some(tx)) = (&res, &self.new_accounts_tx) { 178 | tx.send(address.0)?; 179 | } 180 | 181 | handle_error(res).await 182 | } 183 | 184 | /// Checks if an account is registered 185 | #[instrument(skip(self))] 186 | pub async fn is_registered(&self, address: Address) -> Result { 187 | use schema::accounts::dsl; 188 | 189 | let mut conn = self.pool.get().await?; 190 | 191 | let res: i64 = schema::accounts::table 192 | .filter(dsl::address.eq(&address)) 193 | .filter(dsl::chain_id.eq(self.chain_id)) 194 | .count() 195 | .get_result(&mut conn) 196 | .await?; 197 | 198 | Ok(res > 0) 199 | } 200 | 201 | pub async fn history(&self, address: &Address) -> Result> { 202 | use schema::txs::{self, dsl}; 203 | let mut conn = self.pool.get().await?; 204 | 205 | Ok(schema::txs::table 206 | .filter(dsl::chain_id.eq(self.chain_id)) 207 | .filter(dsl::address.eq(address)) 208 | .select((txs::address, txs::hash)) 209 | .select(Txs::as_select()) 210 | .order(dsl::block_number.asc()) 211 | .load(&mut conn) 212 | .await?) 213 | } 214 | 215 | pub async fn get_addresses(&self) -> Result> { 216 | use schema::accounts::dsl; 217 | let mut conn = self.pool.get().await?; 218 | 219 | let res = dsl::accounts 220 | .filter(dsl::chain_id.eq(self.chain_id)) 221 | .select(dsl::address) 222 | .load(&mut conn) 223 | .await?; 224 | Ok(res) 225 | } 226 | 227 | #[instrument(skip(self, txs), fields(txs = txs.len()))] 228 | pub async fn create_txs(&self, txs: Vec) -> Result<()> { 229 | use schema::txs::dsl; 230 | let mut conn = self.pool.get().await?; 231 | 232 | let res = insert_into(dsl::txs) 233 | .values(&txs) 234 | .on_conflict_do_nothing() 235 | .execute(&mut conn) 236 | .await; 237 | 238 | handle_error(res).await 239 | } 240 | 241 | #[instrument(skip(self))] 242 | pub async fn create_backfill_job(&self, address: Address, low: i32, high: i32) -> Result<()> { 243 | use schema::backfill_jobs::dsl; 244 | let mut conn = self.pool.get().await?; 245 | 246 | let res = insert_into(dsl::backfill_jobs) 247 | .values(( 248 | dsl::addresses.eq(vec![address]), 249 | dsl::chain_id.eq(self.chain_id), 250 | dsl::low.eq(low), 251 | dsl::high.eq(high), 252 | )) 253 | .on_conflict_do_nothing() 254 | .execute(&mut conn) 255 | .await; 256 | 257 | // notify backfill job new work is available 258 | if let (Ok(_), Some(tx)) = (&res, &self.new_job_tx) { 259 | tx.send(())?; 260 | } 261 | 262 | handle_error(res).await 263 | } 264 | 265 | pub async fn get_backfill_jobs(&self) -> Result> { 266 | use schema::backfill_jobs::dsl; 267 | let mut conn = self.pool.get().await?; 268 | 269 | let res = dsl::backfill_jobs 270 | .filter(dsl::chain_id.eq(self.chain_id)) 271 | .select(BackfillJobWithId::as_select()) 272 | .order(dsl::high.desc()) 273 | .load(&mut conn) 274 | .await?; 275 | 276 | Ok(res) 277 | } 278 | 279 | /// Deletes all existing backfill jobs, and rearranges them for optimal I/O 280 | /// See `utils::rearrange` for more details 281 | #[instrument(skip(self))] 282 | pub async fn reorg_backfill_jobs(&self) -> Result<()> { 283 | use schema::backfill_jobs::dsl; 284 | let mut conn = self.pool.get().await?; 285 | 286 | conn.transaction::<_, diesel::result::Error, _>(|mut conn| { 287 | async move { 288 | let jobs = dsl::backfill_jobs 289 | .filter(dsl::chain_id.eq(self.chain_id)) 290 | .select(BackfillJob::as_select()) 291 | .order(dsl::high.desc()) 292 | .load(&mut conn) 293 | .await?; 294 | 295 | let rearranged = crate::rearrange::rearrange(&jobs); 296 | 297 | delete(dsl::backfill_jobs).execute(&mut conn).await?; 298 | 299 | let rearranged: Vec<_> = rearranged 300 | .into_iter() 301 | .map(|j| BackfillJobWithChainId { 302 | addresses: j.addresses, 303 | chain_id: self.chain_id, 304 | low: j.low, 305 | high: j.high, 306 | }) 307 | .collect(); 308 | 309 | insert_into(dsl::backfill_jobs) 310 | .values(&rearranged) 311 | .execute(&mut conn) 312 | .await?; 313 | 314 | Ok(()) 315 | } 316 | .scope_boxed() 317 | }) 318 | .await?; 319 | 320 | Ok(()) 321 | } 322 | 323 | /// Updates the to_block for a backfill job 324 | pub async fn update_job(&self, id: i32, high: u64) -> Result<()> { 325 | use schema::backfill_jobs::dsl; 326 | let mut conn = self.pool.get().await?; 327 | 328 | let res = update(dsl::backfill_jobs) 329 | .filter(dsl::id.eq(id)) 330 | .set(dsl::high.eq(high as i32)) 331 | .execute(&mut conn) 332 | .await; 333 | handle_error(res).await 334 | } 335 | } 336 | 337 | async fn handle_error(res: diesel::QueryResult) -> Result<()> { 338 | match res { 339 | Ok(_) => Ok(()), 340 | Err(diesel::result::Error::DatabaseError( 341 | diesel::result::DatabaseErrorKind::ForeignKeyViolation, 342 | _, 343 | )) => Ok(()), 344 | Err(e) => Err(e)?, 345 | } 346 | } 347 | 348 | impl std::fmt::Debug for Db { 349 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 350 | f.debug_struct("Db") 351 | .field("chain_id", &self.chain_id) 352 | .finish() 353 | } 354 | } 355 | -------------------------------------------------------------------------------- /src/db/models.rs: -------------------------------------------------------------------------------- 1 | use diesel::{pg::Pg, prelude::*}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | use super::{ 5 | schema::{accounts, backfill_jobs, chains, txs}, 6 | types::{Address, B256}, 7 | }; 8 | 9 | #[derive(Debug, Queryable, Selectable, Serialize)] 10 | #[diesel(table_name = accounts, check_for_backend(Pg))] 11 | pub struct Account { 12 | pub address: Address, 13 | pub chain_id: i32, 14 | pub created_at: chrono::NaiveDateTime, 15 | pub updated_at: chrono::NaiveDateTime, 16 | } 17 | 18 | #[derive(Debug, Queryable, Selectable, Serialize)] 19 | #[diesel(table_name = txs, check_for_backend(Pg))] 20 | pub struct Txs { 21 | pub address: Address, 22 | pub chain_id: i32, 23 | pub hash: B256, 24 | pub block_number: i32, 25 | pub created_at: chrono::NaiveDateTime, 26 | pub updated_at: chrono::NaiveDateTime, 27 | } 28 | 29 | #[derive(Debug, Deserialize, Insertable)] 30 | #[diesel(table_name = txs, check_for_backend(Pg))] 31 | pub struct CreateTx { 32 | pub address: Address, 33 | pub chain_id: i32, 34 | pub hash: B256, 35 | pub block_number: i32, 36 | } 37 | 38 | #[derive(Debug, Queryable, Selectable)] 39 | #[diesel(table_name = chains, check_for_backend(Pg))] 40 | pub struct Chain { 41 | pub chain_id: i32, 42 | pub start_block: i32, 43 | pub last_known_block: i32, 44 | #[allow(dead_code)] 45 | pub updated_at: chrono::NaiveDateTime, 46 | } 47 | 48 | #[derive(Debug, Queryable, Selectable, Insertable, Clone)] 49 | #[diesel(table_name = backfill_jobs, check_for_backend(Pg))] 50 | pub struct BackfillJob { 51 | pub addresses: Vec
, 52 | 53 | /// The low (oldest) block number 54 | pub low: i32, 55 | 56 | /// The high (newest) block number 57 | pub high: i32, 58 | } 59 | 60 | #[derive(Debug, Insertable, Clone)] 61 | #[diesel(table_name = backfill_jobs, check_for_backend(Pg))] 62 | pub struct BackfillJobWithChainId { 63 | pub addresses: Vec
, 64 | 65 | pub chain_id: i32, 66 | 67 | /// The low (oldest) block number 68 | pub low: i32, 69 | 70 | /// The high (newest) block number 71 | pub high: i32, 72 | } 73 | 74 | #[derive(Debug, Queryable, Selectable, Insertable, Clone)] 75 | #[diesel(table_name = backfill_jobs, check_for_backend(Pg))] 76 | pub struct BackfillJobWithId { 77 | pub id: i32, 78 | pub addresses: Vec
, 79 | 80 | /// The low (oldest) block number 81 | pub low: i32, 82 | 83 | /// The high (newest) block number 84 | pub high: i32, 85 | } 86 | -------------------------------------------------------------------------------- /src/db/schema.rs: -------------------------------------------------------------------------------- 1 | // @generated automatically by Diesel CLI. 2 | 3 | diesel::table! { 4 | accounts (address, chain_id) { 5 | address -> Bytea, 6 | chain_id -> Int4, 7 | created_at -> Timestamp, 8 | updated_at -> Timestamp, 9 | } 10 | } 11 | 12 | diesel::table! { 13 | backfill_jobs (id) { 14 | id -> Int4, 15 | addresses -> Array, 16 | chain_id -> Int4, 17 | low -> Int4, 18 | high -> Int4, 19 | created_at -> Timestamp, 20 | updated_at -> Timestamp, 21 | } 22 | } 23 | 24 | diesel::table! { 25 | chains (chain_id) { 26 | chain_id -> Int4, 27 | start_block -> Int4, 28 | last_known_block -> Int4, 29 | updated_at -> Timestamp, 30 | } 31 | } 32 | 33 | diesel::table! { 34 | txs (address, chain_id, hash) { 35 | address -> Bytea, 36 | chain_id -> Int4, 37 | hash -> Bytea, 38 | block_number -> Int4, 39 | created_at -> Timestamp, 40 | updated_at -> Timestamp, 41 | } 42 | } 43 | 44 | diesel::joinable!(backfill_jobs -> chains (chain_id)); 45 | 46 | diesel::allow_tables_to_appear_in_same_query!(accounts, backfill_jobs, chains, txs,); 47 | -------------------------------------------------------------------------------- /src/db/types.rs: -------------------------------------------------------------------------------- 1 | use std::str::FromStr; 2 | 3 | use bigdecimal::BigDecimal; 4 | use diesel::{ 5 | deserialize::{self, FromSql, FromSqlRow}, 6 | expression::AsExpression, 7 | pg::{Pg, PgValue}, 8 | serialize::{self, Output, ToSql}, 9 | sql_types::{Bytea, Numeric}, 10 | }; 11 | use serde::{Deserialize, Serialize}; 12 | 13 | #[derive(Debug, Deserialize, Serialize, AsExpression, FromSqlRow, Clone)] 14 | #[diesel(sql_type=Bytea)] 15 | pub struct Address(pub alloy_primitives::Address); 16 | 17 | #[derive(Debug, Deserialize, Serialize, AsExpression, FromSqlRow)] 18 | #[diesel(sql_type=Numeric)] 19 | pub struct U256(pub alloy_primitives::U256); 20 | 21 | #[derive(Debug, Deserialize, Serialize, AsExpression, FromSqlRow)] 22 | #[diesel(sql_type=Bytea)] 23 | pub struct B256(pub alloy_primitives::B256); 24 | 25 | impl From for Address { 26 | fn from(value: alloy_primitives::Address) -> Self { 27 | Self(value) 28 | } 29 | } 30 | 31 | impl From for Address { 32 | fn from(value: ethers_core::types::Address) -> Self { 33 | alloy_primitives::Address::from_slice(value.as_bytes()).into() 34 | } 35 | } 36 | 37 | impl From for U256 { 38 | fn from(value: alloy_primitives::U256) -> Self { 39 | Self(value) 40 | } 41 | } 42 | 43 | impl From for B256 { 44 | fn from(value: alloy_primitives::B256) -> Self { 45 | Self(value) 46 | } 47 | } 48 | 49 | impl ToSql for Address { 50 | fn to_sql(&self, out: &mut Output<'_, '_, Pg>) -> serialize::Result { 51 | as ToSql>::to_sql(&self.0.to_vec(), &mut out.reborrow()) 52 | } 53 | } 54 | 55 | impl FromSql for Address { 56 | fn from_sql(bytes: PgValue) -> deserialize::Result { 57 | as FromSql>::from_sql(bytes) 58 | .map(|b| Address(alloy_primitives::Address::from_slice(&b))) 59 | } 60 | } 61 | 62 | impl ToSql for U256 { 63 | fn to_sql(&self, out: &mut Output<'_, '_, Pg>) -> serialize::Result { 64 | let decimal = BigDecimal::from_str(&self.0.to_string())?; 65 | >::to_sql(&decimal, &mut out.reborrow()) 66 | } 67 | } 68 | 69 | impl FromSql for U256 { 70 | fn from_sql(bytes: PgValue) -> deserialize::Result { 71 | let bigdecimal = >::from_sql(bytes)?; 72 | 73 | Ok(Self(alloy_primitives::U256::from_str( 74 | &bigdecimal.to_string(), 75 | )?)) 76 | } 77 | } 78 | 79 | impl ToSql for B256 { 80 | fn to_sql(&self, out: &mut Output<'_, '_, Pg>) -> serialize::Result { 81 | as ToSql>::to_sql(&self.0.to_vec(), &mut out.reborrow()) 82 | } 83 | } 84 | 85 | impl FromSql for B256 { 86 | fn from_sql(bytes: PgValue) -> deserialize::Result { 87 | as FromSql>::from_sql(bytes) 88 | .map(|b| B256(alloy_primitives::B256::from_slice(&b))) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod config; 2 | pub mod db; 3 | pub mod rearrange; 4 | pub mod sync; 5 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | mod api; 2 | mod config; 3 | mod db; 4 | mod rearrange; 5 | mod sync; 6 | 7 | use std::sync::Arc; 8 | 9 | use color_eyre::eyre::Result; 10 | use config::Config; 11 | use tokio::{signal, sync::mpsc}; 12 | use tokio_util::{sync::CancellationToken, task::TaskTracker}; 13 | use tracing::info; 14 | use tracing_subscriber::{fmt::format::FmtSpan, EnvFilter}; 15 | 16 | use self::{ 17 | db::Db, 18 | sync::{BackfillManager, Forward, SyncJob}, 19 | }; 20 | use crate::sync::{RethProviderFactory, StopStrategy}; 21 | 22 | #[tokio::main] 23 | async fn main() -> Result<()> { 24 | setup()?; 25 | 26 | let config = Config::read()?; 27 | 28 | // set up a few random things 29 | let (account_tx, account_rx) = mpsc::unbounded_channel(); 30 | let (job_tx, job_rx) = mpsc::unbounded_channel(); 31 | let db = Db::connect(&config, account_tx, job_tx).await?; 32 | let chain = db.setup_chain(&config.chain).await?; 33 | let provider_factory = Arc::new(RethProviderFactory::new(&config, &chain)?); 34 | let token = CancellationToken::new(); 35 | 36 | // setup each task 37 | let sync = Forward::new( 38 | db.clone(), 39 | &config, 40 | chain, 41 | provider_factory.clone(), 42 | account_rx, 43 | token.clone(), 44 | ) 45 | .await?; 46 | let backfill = BackfillManager::new( 47 | db.clone(), 48 | &config, 49 | provider_factory.clone(), 50 | job_rx, 51 | StopStrategy::Token(token.clone()), 52 | ); 53 | let api = config 54 | .clone() 55 | .http 56 | .map(|_| api::start(db.clone(), config, provider_factory.clone())); 57 | 58 | // spawn and track tasks 59 | let tracker = TaskTracker::new(); 60 | tracker.spawn(sync.run()); 61 | tracker.spawn(backfill.run()); 62 | api.map(|t| tracker.spawn(t)); 63 | 64 | // termination handling 65 | signal::ctrl_c().await?; 66 | info!("graceful shutdown initiated..."); 67 | token.cancel(); 68 | tracker.close(); 69 | tracker.wait().await; 70 | 71 | info!("graceful shutdown achieved. Closing"); 72 | 73 | Ok(()) 74 | } 75 | 76 | fn setup() -> Result<()> { 77 | color_eyre::install()?; 78 | 79 | let filter = EnvFilter::from_default_env(); 80 | 81 | let subscriber = tracing_subscriber::FmtSubscriber::builder() 82 | .with_env_filter(filter) 83 | .with_span_events(FmtSpan::NEW) 84 | .compact() 85 | .finish(); 86 | tracing::subscriber::set_global_default(subscriber)?; 87 | 88 | Ok(()) 89 | } 90 | -------------------------------------------------------------------------------- /src/rearrange.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{BTreeSet, HashMap}; 2 | 3 | use crate::db::models::BackfillJob; 4 | 5 | /// Assumes jobs are already sorted by from_block 6 | pub fn rearrange(jobs: &[BackfillJob]) -> Vec { 7 | let points = jobs 8 | .iter() 9 | .filter(|j| j.low != j.high) // filter out empty jobs 10 | .fold(BTreeSet::new(), |mut acc, j| { 11 | acc.insert(j.low); 12 | acc.insert(j.high); 13 | acc 14 | }); 15 | 16 | let sorted_points: Vec = points.into_iter().collect(); 17 | 18 | let mut range_map = HashMap::new(); 19 | let mut size = 0; 20 | 21 | for i in 0..sorted_points.len().saturating_sub(1) { 22 | let start = sorted_points[i]; 23 | let end = sorted_points[i + 1]; 24 | let range = start..end; 25 | 26 | let mut addresses = Vec::new(); 27 | for job in jobs.iter() { 28 | if job.low >= end { 29 | continue; 30 | }; 31 | 32 | let job_range = job.low..job.high; 33 | 34 | if job_range.contains(&range.start) && job_range.contains(&(range.end - 1)) { 35 | addresses.extend_from_slice(&job.addresses) 36 | } 37 | } 38 | 39 | size += addresses.len(); 40 | if !addresses.is_empty() { 41 | range_map.insert((start, end), addresses); 42 | } 43 | } 44 | 45 | let mut res = Vec::with_capacity(size); 46 | range_map.into_iter().for_each(|((low, high), addresses)| { 47 | res.push(BackfillJob { 48 | addresses, 49 | low, 50 | high, 51 | }) 52 | }); 53 | 54 | res 55 | } 56 | 57 | #[cfg(test)] 58 | mod tests { 59 | use alloy_primitives::Address; 60 | use rstest::*; 61 | 62 | use super::*; 63 | 64 | #[derive(Debug, PartialEq)] 65 | struct FakeJob(Vec, i32, i32); 66 | 67 | #[derive(Debug)] 68 | struct Fixture { 69 | input: Vec, 70 | output: Vec, 71 | } 72 | 73 | #[fixture] 74 | fn adjacent_jobs_1() -> Fixture { 75 | Fixture { 76 | input: vec![FakeJob(vec![0x1], 0, 10), FakeJob(vec![0x1], 10, 20)], 77 | output: vec![FakeJob(vec![0x1], 0, 10), FakeJob(vec![0x1], 10, 20)], 78 | } 79 | } 80 | 81 | #[fixture] 82 | fn same_range_different_addresses() -> Fixture { 83 | Fixture { 84 | input: vec![FakeJob(vec![0x1], 0, 10), FakeJob(vec![0x2], 0, 10)], 85 | output: vec![FakeJob(vec![0x1, 0x2], 0, 10)], 86 | } 87 | } 88 | 89 | #[fixture] 90 | fn empty_range() -> Fixture { 91 | Fixture { 92 | input: vec![FakeJob(vec![0x1], 0, 0)], 93 | output: vec![], 94 | } 95 | } 96 | 97 | #[fixture] 98 | fn single_block() -> Fixture { 99 | Fixture { 100 | input: vec![FakeJob(vec![0x1], 0, 1)], 101 | output: vec![FakeJob(vec![0x1], 0, 1)], 102 | } 103 | } 104 | 105 | #[fixture] 106 | fn mix1() -> Fixture { 107 | Fixture { 108 | input: vec![FakeJob(vec![0x1], 1, 2), FakeJob(vec![0x2], 1, 3)], 109 | output: vec![FakeJob(vec![0x1, 0x2], 1, 2), FakeJob(vec![0x2], 2, 3)], 110 | } 111 | } 112 | 113 | #[fixture] 114 | fn mix2() -> Fixture { 115 | Fixture { 116 | input: vec![FakeJob(vec![0x1], 1, 10), FakeJob(vec![0x2], 5, 15)], 117 | output: vec![ 118 | FakeJob(vec![0x1], 1, 5), 119 | FakeJob(vec![0x1, 0x2], 5, 10), 120 | FakeJob(vec![0x2], 10, 15), 121 | ], 122 | } 123 | } 124 | 125 | #[fixture] 126 | fn mix3() -> Fixture { 127 | Fixture { 128 | input: vec![ 129 | FakeJob(vec![0x1], 10, 20), 130 | FakeJob(vec![0x2], 15, 25), 131 | FakeJob(vec![0x3], 20, 30), 132 | ], 133 | output: vec![ 134 | FakeJob(vec![0x1], 10, 15), 135 | FakeJob(vec![0x1, 0x2], 15, 20), 136 | // FakeJob(vec![0x1, 0x2, 0x3], 20, 20), 137 | FakeJob(vec![0x2, 0x3], 20, 25), 138 | FakeJob(vec![0x3], 25, 30), 139 | ], 140 | } 141 | } 142 | 143 | #[fixture] 144 | fn mix4() -> Fixture { 145 | Fixture { 146 | input: vec![ 147 | FakeJob(vec![0x1], 10, 21), 148 | FakeJob(vec![0x2], 15, 25), 149 | FakeJob(vec![0x3], 20, 30), 150 | ], 151 | output: vec![ 152 | FakeJob(vec![0x1], 10, 15), 153 | FakeJob(vec![0x1, 0x2], 15, 20), 154 | FakeJob(vec![0x1, 0x2, 0x3], 20, 21), 155 | FakeJob(vec![0x2, 0x3], 21, 25), 156 | FakeJob(vec![0x3], 25, 30), 157 | ], 158 | } 159 | } 160 | 161 | #[rstest] 162 | #[case(adjacent_jobs_1())] 163 | #[case(same_range_different_addresses())] 164 | #[case(empty_range())] 165 | #[case(single_block())] 166 | #[case(mix1())] 167 | #[case(mix2())] 168 | #[case(mix3())] 169 | #[case(mix4())] 170 | fn test(#[case] fixture: Fixture) { 171 | let jobs = to_jobs(fixture.input); 172 | let mut result = rearrange(&jobs); 173 | result.sort_by(|j, j2| j.low.cmp(&j2.low)); 174 | 175 | assert_eq!(result.len(), fixture.output.len()); 176 | 177 | for (job, expectation) in result.into_iter().zip(fixture.output.iter()) { 178 | let fake = FakeJob( 179 | job.addresses 180 | .into_iter() 181 | .map(|a| a.0.as_slice()[0]) 182 | .collect(), 183 | job.low, 184 | job.high, 185 | ); 186 | assert_eq!(&fake, expectation); 187 | } 188 | } 189 | 190 | fn to_jobs(ranges: Vec) -> Vec { 191 | ranges 192 | .into_iter() 193 | .map(|FakeJob(ids, low, high)| { 194 | let addresses = ids 195 | .into_iter() 196 | .map(|i| { 197 | let slice = &[i; 20]; 198 | Address::from_slice(slice).into() 199 | }) 200 | .collect(); 201 | 202 | BackfillJob { 203 | low, 204 | high, 205 | addresses, 206 | } 207 | }) 208 | .collect() 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/sync/backfill.rs: -------------------------------------------------------------------------------- 1 | use std::{sync::Arc, time::Duration}; 2 | 3 | use async_trait::async_trait; 4 | use color_eyre::eyre::Result; 5 | use reth_provider::HeaderProvider; 6 | use tokio::{ 7 | select, 8 | sync::{mpsc::UnboundedReceiver, RwLock, Semaphore}, 9 | time::sleep, 10 | }; 11 | use tokio_util::sync::CancellationToken; 12 | use tracing::{info, instrument}; 13 | 14 | use super::{RethProviderFactory, SyncJob, Worker}; 15 | use crate::{ 16 | config::Config, 17 | db::{models::BackfillJobWithId, Db}, 18 | }; 19 | 20 | #[derive(Debug)] 21 | pub enum StopStrategy { 22 | /// This mode is used in production, taking a cancellation for graceful shutdowns 23 | Token(CancellationToken), 24 | 25 | /// This mode is only used in benchmarks, where we want to sync only a fixed set of blocks 26 | /// instead of continuouslly waiting for new work 27 | #[allow(dead_code)] 28 | OnFinish, 29 | } 30 | 31 | impl StopStrategy { 32 | fn is_on_finish(&self) -> bool { 33 | matches!(self, StopStrategy::Token(_)) 34 | } 35 | } 36 | 37 | /// Backfill job 38 | /// Walks the blockchain backwards, within a fixed range 39 | /// Processes a list of addresses determined by the rearrangment logic defined in 40 | /// `crate::db::rearrange_backfill` 41 | pub struct BackfillManager { 42 | db: Db, 43 | concurrency: usize, 44 | jobs_rcv: UnboundedReceiver<()>, 45 | config: Arc>, 46 | stop: StopStrategy, 47 | provider_factory: Arc, 48 | } 49 | 50 | impl BackfillManager { 51 | pub fn new( 52 | db: Db, 53 | config: &Config, 54 | provider_factory: Arc, 55 | jobs_rcv: UnboundedReceiver<()>, 56 | stop: StopStrategy, 57 | ) -> Self { 58 | Self { 59 | db, 60 | jobs_rcv, 61 | provider_factory, 62 | config: Arc::new(RwLock::new(config.clone())), 63 | concurrency: config.sync.backfill_concurrency, 64 | stop, 65 | } 66 | } 67 | 68 | #[instrument(name = "backfill", skip(self), fields(concurrency = self.concurrency))] 69 | pub async fn run(mut self) -> Result<()> { 70 | loop { 71 | let semaphore = Arc::new(Semaphore::new(self.concurrency)); 72 | let inner_cancel = CancellationToken::new(); 73 | 74 | self.db.reorg_backfill_jobs().await?; 75 | let jobs = self.db.get_backfill_jobs().await?; 76 | 77 | if self.stop.is_on_finish() && jobs.is_empty() { 78 | break; 79 | } 80 | 81 | let workers = jobs 82 | .into_iter() 83 | .map(|job| { 84 | let db = self.db.clone(); 85 | let factory = self.provider_factory.clone(); 86 | let semaphore = semaphore.clone(); 87 | let config = self.config.clone(); 88 | let token = inner_cancel.clone(); 89 | tokio::spawn(async move { 90 | let _permit = semaphore.acquire().await.unwrap(); 91 | if token.is_cancelled() { 92 | return Ok(()); 93 | } 94 | let worker = Backfill::new_worker(db, config, job, factory, token) 95 | .await 96 | .unwrap(); 97 | worker.run().await 98 | }) 99 | }) 100 | .collect::>(); 101 | 102 | // wait for a new job, or a preset delay, whichever comes first 103 | match &self.stop { 104 | // stop when cancellation token signals 105 | // wait for new jobs too, which should be a sign to reorg 106 | // request each job to stop 107 | StopStrategy::Token(token) => { 108 | let timeout = sleep(Duration::from_secs(1)); 109 | select! { 110 | _ = token.cancelled() => {} 111 | _ = timeout => {} 112 | Some(_) = self.jobs_rcv.recv() => {} 113 | } 114 | inner_cancel.cancel(); 115 | for worker in workers { 116 | worker.await.unwrap().unwrap(); 117 | } 118 | 119 | // if we stopped because cancelation token was triggered, end the job for good 120 | if token.is_cancelled() { 121 | info!("closing backfill manager"); 122 | break; 123 | } 124 | } 125 | 126 | // if we stop on finish, no need to do anything here 127 | StopStrategy::OnFinish => { 128 | for worker in workers { 129 | worker.await.unwrap().unwrap(); 130 | } 131 | break; 132 | } 133 | } 134 | } 135 | 136 | Ok(()) 137 | } 138 | } 139 | 140 | #[derive(Debug)] 141 | pub struct Backfill { 142 | job_id: i32, 143 | high: u64, 144 | low: u64, 145 | } 146 | 147 | #[async_trait] 148 | impl SyncJob for Worker { 149 | #[instrument(skip(self), fields(chain_id = self.chain.chain_id))] 150 | async fn run(mut self) -> Result<()> { 151 | for block in (self.inner.low..self.inner.high).rev() { 152 | let provider = self.provider_factory.get()?; 153 | // start by checking shutdown signal 154 | if self.cancellation_token.is_cancelled() { 155 | // the final flush after the loop would skip all the blocks we canceled 156 | // so we flush with the current block instead 157 | self.flush(block).await?; 158 | return Ok(()); 159 | } 160 | 161 | let header = provider.header_by_number(block)?.unwrap(); 162 | self.process_block(&header).await?; 163 | self.maybe_flush(block).await?; 164 | 165 | if block % 10 == 0 { 166 | tokio::task::yield_now().await; 167 | } 168 | } 169 | 170 | self.flush(self.inner.low).await?; 171 | 172 | info!("closing backfill worker"); 173 | Ok(()) 174 | } 175 | } 176 | 177 | impl Worker { 178 | /// if the buffer is sufficiently large, flush it to the database 179 | /// and update chain tip 180 | pub async fn maybe_flush(&mut self, last_block: u64) -> Result<()> { 181 | if self.buffer.len() >= self.buffer_capacity { 182 | self.flush(last_block).await?; 183 | } 184 | 185 | Ok(()) 186 | } 187 | 188 | // empties the buffer and updates chain tip 189 | pub async fn flush(&mut self, last_block: u64) -> Result<()> { 190 | let txs = self.drain_buffer(); 191 | 192 | self.db.create_txs(txs).await?; 193 | self.db.update_job(self.inner.job_id, last_block).await?; 194 | 195 | Ok(()) 196 | } 197 | } 198 | 199 | impl Backfill { 200 | async fn new_worker( 201 | db: Db, 202 | config: Arc>, 203 | job: BackfillJobWithId, 204 | provider_factory: Arc, 205 | cancellation_token: CancellationToken, 206 | ) -> Result> { 207 | let config = config.read().await; 208 | let chain = db.setup_chain(&config.chain).await?; 209 | 210 | let s = Self { 211 | job_id: job.id, 212 | high: job.high as u64, 213 | low: job.low as u64, 214 | }; 215 | 216 | Worker::new(s, db, &config, chain, provider_factory, cancellation_token).await 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/sync/forward.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use alloy_primitives::Address; 4 | use async_trait::async_trait; 5 | use color_eyre::eyre::Result; 6 | use reth_provider::HeaderProvider; 7 | use tokio::sync::mpsc::UnboundedReceiver; 8 | use tokio_util::sync::CancellationToken; 9 | use tracing::{info, instrument}; 10 | 11 | use super::{RethProviderFactory, SyncJob, Worker}; 12 | use crate::{ 13 | config::Config, 14 | db::{models::Chain, Db}, 15 | }; 16 | 17 | /// Main sync job 18 | /// Walks the blockchain forward, from a pre-configured starting block. 19 | /// Once it reaches the tip, waits continuously for new blocks to process 20 | /// 21 | /// Receives events for newly registered addresses, at which point they are added to the search set 22 | /// and a backfill job is scheduled 23 | #[derive(Debug)] 24 | pub struct Forward { 25 | /// Receiver for account registration events 26 | accounts_rcv: UnboundedReceiver
, 27 | next_block: u64, 28 | } 29 | 30 | #[async_trait] 31 | impl SyncJob for Worker { 32 | #[instrument(name = "forward", skip(self), fields(chain_id = self.chain.chain_id))] 33 | async fn run(mut self) -> Result<()> { 34 | self.inner.next_block = (self.chain.last_known_block as u64) + 1; 35 | 36 | loop { 37 | if self.cancellation_token.is_cancelled() { 38 | break; 39 | } 40 | 41 | self.process_new_accounts().await?; 42 | 43 | let provider = self.provider_factory.get()?; 44 | match provider.header_by_number(self.inner.next_block)? { 45 | // got a block. process it, only flush if needed 46 | Some(header) => { 47 | self.process_block(&header).await?; 48 | self.maybe_flush().await?; 49 | self.inner.next_block += 1; 50 | } 51 | 52 | // no block found. take the wait chance to flush, and wait for new block 53 | None => { 54 | self.flush().await?; 55 | self.wait_new_block(self.inner.next_block).await?; 56 | } 57 | } 58 | } 59 | 60 | info!("closing"); 61 | Ok(()) 62 | } 63 | } 64 | 65 | impl Worker { 66 | pub async fn process_new_accounts(&mut self) -> Result<()> { 67 | while let Ok(address) = self.inner.accounts_rcv.try_recv() { 68 | self.addresses.insert(address); 69 | self.cuckoo.insert(&address); 70 | self.setup_backfill(address).await?; 71 | } 72 | Ok(()) 73 | } 74 | 75 | /// Create a new job for backfilling history for a new account 76 | /// before the current sync point 77 | async fn setup_backfill(&mut self, address: Address) -> Result<()> { 78 | self.db 79 | .create_backfill_job( 80 | address.into(), 81 | self.chain.start_block, 82 | self.inner.next_block as i32, 83 | ) 84 | .await?; 85 | Ok(()) 86 | } 87 | 88 | /// if the buffer is sufficiently large, flush it to the database 89 | /// and update chain tip 90 | pub async fn maybe_flush(&mut self) -> Result<()> { 91 | if self.buffer.len() >= self.buffer_capacity { 92 | self.flush().await?; 93 | } 94 | 95 | Ok(()) 96 | } 97 | 98 | // empties the buffer and updates chain tip 99 | pub async fn flush(&mut self) -> Result<()> { 100 | let txs = self.drain_buffer(); 101 | 102 | self.db.create_txs(txs).await?; 103 | self.db 104 | .update_chain(self.chain.chain_id as u64, self.inner.next_block) 105 | .await?; 106 | 107 | Ok(()) 108 | } 109 | } 110 | 111 | impl Forward { 112 | pub async fn new( 113 | db: Db, 114 | config: &Config, 115 | chain: Chain, 116 | provider_factory: Arc, 117 | accounts_rcv: UnboundedReceiver
, 118 | cancellation_token: CancellationToken, 119 | ) -> Result> { 120 | Worker::new( 121 | Forward { 122 | accounts_rcv, 123 | next_block: (chain.last_known_block as u64) + 1, 124 | }, 125 | db, 126 | config, 127 | chain, 128 | provider_factory, 129 | cancellation_token, 130 | ) 131 | .await 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/sync/mod.rs: -------------------------------------------------------------------------------- 1 | mod backfill; 2 | mod forward; 3 | mod provider; 4 | mod utils; 5 | 6 | use std::{ 7 | collections::{BTreeSet, HashSet}, 8 | sync::Arc, 9 | time::Duration, 10 | }; 11 | 12 | use alloy_primitives::{Address, B256}; 13 | use async_trait::async_trait; 14 | pub use backfill::{BackfillManager, StopStrategy}; 15 | use color_eyre::eyre::{eyre, Result}; 16 | pub use forward::Forward; 17 | pub use provider::RethProviderFactory; 18 | use rand::{rngs::StdRng, SeedableRng}; 19 | use reth_primitives::Header; 20 | use reth_provider::{BlockNumReader, BlockReader, ReceiptProvider, TransactionsProvider}; 21 | use scalable_cuckoo_filter::{DefaultHasher, ScalableCuckooFilter, ScalableCuckooFilterBuilder}; 22 | use tokio::time::sleep; 23 | use tokio_util::sync::CancellationToken; 24 | use tracing::trace; 25 | 26 | use crate::{ 27 | config::Config, 28 | db::{ 29 | models::{Chain, CreateTx}, 30 | Db, 31 | }, 32 | }; 33 | 34 | /// Generic sync job state 35 | #[derive(Debug)] 36 | pub struct Worker { 37 | inner: T, 38 | 39 | provider_factory: Arc, 40 | 41 | /// DB handle 42 | db: Db, 43 | 44 | /// Chain configuration 45 | chain: Chain, 46 | 47 | /// Set of addresses to search for 48 | addresses: BTreeSet
, 49 | 50 | /// Cuckoo filter for fast address inclusion check 51 | cuckoo: ScalableCuckooFilter, 52 | 53 | /// Buffer holding matches to be written to the database 54 | buffer: Vec, 55 | 56 | /// Desired buffer capacity, and threshold at which to flush it 57 | buffer_capacity: usize, 58 | 59 | /// Cancellation token for graceful shutdown 60 | cancellation_token: CancellationToken, 61 | } 62 | 63 | /// A match between an address and a transaction 64 | #[derive(Debug)] 65 | pub struct Match { 66 | pub address: Address, 67 | pub block_number: u64, 68 | pub hash: B256, 69 | } 70 | 71 | #[async_trait] 72 | pub trait SyncJob { 73 | async fn run(mut self) -> Result<()>; 74 | } 75 | 76 | impl Worker { 77 | async fn new( 78 | inner: T, 79 | db: Db, 80 | config: &Config, 81 | chain: Chain, 82 | provider_factory: Arc, 83 | cancellation_token: CancellationToken, 84 | ) -> Result { 85 | let addresses: BTreeSet<_> = db.get_addresses().await?.into_iter().map(|a| a.0).collect(); 86 | let mut cuckoo = ScalableCuckooFilterBuilder::new() 87 | .initial_capacity(addresses.len()) 88 | .rng(StdRng::from_entropy()) 89 | .finish(); 90 | 91 | addresses.iter().for_each(|addr| { 92 | cuckoo.insert(addr); 93 | }); 94 | 95 | Ok(Self { 96 | inner, 97 | provider_factory, 98 | db, 99 | chain, 100 | addresses, 101 | cuckoo, 102 | buffer: Vec::with_capacity(config.sync.buffer_size), 103 | buffer_capacity: config.sync.buffer_size, 104 | cancellation_token, 105 | }) 106 | } 107 | 108 | pub fn drain_buffer(&mut self) -> Vec { 109 | self.buffer 110 | .drain(..) 111 | .map(|m| CreateTx { 112 | address: m.address.into(), 113 | chain_id: self.chain.chain_id, 114 | hash: m.hash.into(), 115 | block_number: m.block_number as i32, 116 | }) 117 | .collect() 118 | } 119 | 120 | async fn wait_new_block(&mut self, block: u64) -> Result<()> { 121 | trace!(event = "wait", block); 122 | loop { 123 | let provider = self.provider_factory.get()?; 124 | 125 | let latest = provider.last_block_number().unwrap(); 126 | 127 | if latest >= block { 128 | trace!("new block(s) found. from: {}, latest: {}", block, latest); 129 | return Ok(()); 130 | } 131 | 132 | drop(provider); 133 | 134 | sleep(Duration::from_secs(2)).await; 135 | } 136 | } 137 | 138 | async fn process_block(&mut self, header: &Header) -> Result<()> { 139 | let provider = self.provider_factory.get()?; 140 | let indices = match provider.block_body_indices(header.number)? { 141 | Some(indices) => indices, 142 | None => return Err(eyre!("err")), 143 | }; 144 | 145 | for tx_id in indices.first_tx_num..indices.first_tx_num + indices.tx_count { 146 | let tx = match provider.transaction_by_id_no_hash(tx_id)? { 147 | Some(tx) => tx, 148 | None => continue, 149 | }; 150 | 151 | let receipt = match provider.receipt(tx_id)? { 152 | Some(receipt) => receipt, 153 | None => continue, 154 | }; 155 | 156 | let mut addresses: HashSet<_> = receipt 157 | .logs 158 | .into_iter() 159 | .flat_map(|log| { 160 | log.topics() 161 | .iter() 162 | .filter_map(utils::topic_as_address) 163 | .collect::>() 164 | }) 165 | .collect(); 166 | 167 | tx.recover_signer().map(|a| addresses.insert(a)); 168 | tx.to().map(|a| addresses.insert(a)); 169 | 170 | addresses 171 | .into_iter() 172 | .filter(|addr| self.cuckoo.contains(addr)) 173 | .filter(|addr| self.addresses.contains(addr)) 174 | .for_each(|address| { 175 | self.buffer.push(Match { 176 | address, 177 | block_number: header.number, 178 | hash: tx.hash(), 179 | }) 180 | }); 181 | } 182 | 183 | Ok(()) 184 | } 185 | } 186 | -------------------------------------------------------------------------------- /src/sync/provider.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre::{self, Result}; 2 | use reth_db::{ 3 | mdbx::{tx::Tx, RO}, 4 | open_db_read_only, DatabaseEnv, 5 | }; 6 | use reth_provider::{providers::StaticFileProvider, DatabaseProvider, ProviderFactory}; 7 | 8 | use crate::{config::Config, db::models::Chain}; 9 | 10 | /// Wraps a provider to access Reth DB 11 | /// While the indexer is heavily coupled to this particular provider, 12 | /// it still benefits from abstracting it so it can be swapped out for testing purposes 13 | #[derive(Debug)] 14 | pub struct RethProviderFactory { 15 | /// Reth Provider factory 16 | factory: ProviderFactory, 17 | } 18 | 19 | impl RethProviderFactory { 20 | /// Creates a new Reth DB provider 21 | pub fn new(config: &Config, chain: &Chain) -> Result { 22 | let chain_id = chain.chain_id as u64; 23 | let config = &config.reth; 24 | let db = open_db_read_only(&config.db, Default::default())?; 25 | 26 | let spec = match chain_id { 27 | 1 => (*reth_chainspec::MAINNET).clone(), 28 | 11155111 => (*reth_chainspec::SEPOLIA).clone(), 29 | _ => return Err(eyre::eyre!("unsupported chain id {}", chain_id)), 30 | }; 31 | 32 | let static_file_provider = StaticFileProvider::read_only(config.static_files.clone())?; 33 | 34 | let factory: ProviderFactory = 35 | ProviderFactory::new(db, spec, static_file_provider); 36 | 37 | Ok(Self { factory }) 38 | } 39 | 40 | pub fn get(&self) -> Result>> { 41 | Ok(self.factory.provider()?) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/sync/utils.rs: -------------------------------------------------------------------------------- 1 | use alloy_primitives::{Address, FixedBytes}; 2 | 3 | pub(super) fn topic_as_address(topic: &FixedBytes<32>) -> Option
{ 4 | let padding_slice = &topic.as_slice()[0..12]; 5 | let padding: FixedBytes<12> = FixedBytes::from_slice(padding_slice); 6 | 7 | if padding.is_zero() { 8 | Some(Address::from_slice(&topic[12..])) 9 | } else { 10 | None 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/task.rs: -------------------------------------------------------------------------------- 1 | pub struct Task { 2 | pub handle: JoinHandle<()>, 3 | pub cancelation_token: CancellationToken, 4 | } 5 | --------------------------------------------------------------------------------