├── .github ├── CODEOWNERS └── workflows │ ├── lint.yaml │ ├── tests.yaml │ ├── release.yaml │ ├── update-processor-sdk-version-legacy.yaml │ ├── update-processor-sdk-version.yaml │ └── update-proto-dependency.yaml ├── aptos-indexer-processors-sdk ├── sdk │ ├── src │ │ ├── types │ │ │ ├── mod.rs │ │ │ └── transaction_context.rs │ │ ├── test │ │ │ ├── mod.rs │ │ │ ├── steps │ │ │ │ ├── mod.rs │ │ │ │ └── pass_through_step.rs │ │ │ └── utils.rs │ │ ├── postgres │ │ │ ├── subconfigs │ │ │ │ ├── mod.rs │ │ │ │ └── postgres_config.rs │ │ │ ├── utils │ │ │ │ ├── mod.rs │ │ │ │ ├── checkpoint.rs │ │ │ │ └── database.rs │ │ │ ├── models │ │ │ │ ├── mod.rs │ │ │ │ ├── ledger_info.rs │ │ │ │ └── processor_status.rs │ │ │ ├── basic_processor │ │ │ │ ├── mod.rs │ │ │ │ ├── README.md │ │ │ │ ├── basic_processor_step.rs │ │ │ │ └── basic_processor_function.rs │ │ │ ├── db │ │ │ │ ├── migrations │ │ │ │ │ ├── 2025-03-06-201942_create_core_schema │ │ │ │ │ │ ├── down.sql │ │ │ │ │ │ └── up.sql │ │ │ │ │ └── 00000000000000_diesel_initial_setup │ │ │ │ │ │ ├── down.sql │ │ │ │ │ │ └── up.sql │ │ │ │ ├── diesel.toml │ │ │ │ └── processor_metadata_schema.rs │ │ │ ├── mod.rs │ │ │ ├── README.md │ │ │ └── progress.rs │ │ ├── builder │ │ │ ├── mod.rs │ │ │ └── dag.rs │ │ ├── testing_framework │ │ │ ├── mod.rs │ │ │ ├── database.rs │ │ │ ├── mock_grpc.rs │ │ │ └── cli_parser.rs │ │ ├── utils │ │ │ ├── mod.rs │ │ │ ├── constants.rs │ │ │ ├── errors.rs │ │ │ ├── README.md │ │ │ ├── chain_id_check.rs │ │ │ ├── property_map.rs │ │ │ └── convert.rs │ │ ├── traits │ │ │ ├── processor_trait.rs │ │ │ ├── into_runnable_step.rs │ │ │ ├── instrumentation.rs │ │ │ ├── mod.rs │ │ │ ├── README.md │ │ │ ├── processable.rs │ │ │ ├── runnable_step.rs │ │ │ └── async_step.rs │ │ ├── health │ │ │ ├── mod.rs │ │ │ ├── core.rs │ │ │ └── progress.rs │ │ ├── common_steps │ │ │ ├── mod.rs │ │ │ ├── timed_buffer_step.rs │ │ │ ├── arcify_step.rs │ │ │ ├── version_tracker_step.rs │ │ │ ├── order_by_version_step.rs │ │ │ └── transaction_stream_step.rs │ │ └── lib.rs │ ├── CHANGELOG.md │ └── Cargo.toml ├── transaction-stream │ ├── src │ │ ├── utils │ │ │ ├── mod.rs │ │ │ ├── additional_headers.rs │ │ │ └── time.rs │ │ ├── lib.rs │ │ └── config.rs │ └── Cargo.toml ├── instrumented-channel │ ├── Cargo.lock │ ├── Cargo.toml │ └── src │ │ ├── channel_metrics.rs │ │ └── lib.rs ├── rustfmt.toml ├── moving-average │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── sample │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── .cargo │ └── config.toml └── Cargo.toml ├── examples ├── postgres-basic-events-example │ ├── src │ │ ├── db │ │ │ ├── migrations │ │ │ │ ├── 2025-03-06-231718_create_events │ │ │ │ │ ├── down.sql │ │ │ │ │ └── up.sql │ │ │ │ └── 00000000000000_diesel_initial_setup │ │ │ │ │ ├── down.sql │ │ │ │ │ └── up.sql │ │ │ ├── diesel.toml │ │ │ └── schema.rs │ │ ├── events_model.rs │ │ └── main.rs │ ├── example-config.yaml │ ├── README.md │ └── Cargo.toml ├── rustfmt.toml ├── .cargo │ └── config.toml └── Cargo.toml ├── .gitignore ├── README.md └── scripts ├── rust_lint.sh └── check_banned_deps.sh /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @aptos-labs/ecosystem-infra @rtso -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/types/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod transaction_context; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod steps; 2 | pub mod utils; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/steps/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod pass_through_step; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod postgres_config; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod checkpoint; 2 | pub mod database; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod ledger_info; 2 | pub mod processor_status; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod additional_headers; 2 | pub mod time; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/builder/mod.rs: -------------------------------------------------------------------------------- 1 | mod dag; 2 | mod processor_builder; 3 | 4 | pub use processor_builder::ProcessorBuilder; 5 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cli_parser; 2 | pub mod database; 3 | mod mock_grpc; 4 | pub mod sdk_test_context; 5 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/down.sql: -------------------------------------------------------------------------------- 1 | -- This file should undo anything in `up.sql` 2 | DROP TABLE IF EXISTS events; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod basic_processor_function; 2 | pub mod basic_processor_step; 3 | 4 | pub use basic_processor_function::{process, run_processor}; 5 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod chain_id_check; 2 | pub mod constants; 3 | pub mod convert; 4 | pub mod errors; 5 | pub mod extract; 6 | pub mod property_map; 7 | pub mod step_metrics; 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "instrumented-channel" 7 | version = "0.1.0" 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE processor_metadata.processor_status IF EXISTS; 2 | DROP TABLE processor_metadata.ledger_infos IF EXISTS; 3 | DROP SCHEMA processor_metadata IF EXISTS; -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/processor_trait.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | 3 | #[async_trait] 4 | pub trait ProcessorTrait: Send + Sync { 5 | fn name(&self) -> &'static str; 6 | async fn run_processor(&self) -> anyhow::Result<()>; 7 | } 8 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/diesel.toml: -------------------------------------------------------------------------------- 1 | # For documentation on how to configure this file, 2 | # see https://diesel.rs/guides/configuring-diesel-cli 3 | 4 | [print_schema] 5 | file = "schema.rs" 6 | 7 | [migrations_directory] 8 | dir = "migrations" 9 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod config; 2 | pub mod transaction_stream; 3 | pub mod utils; 4 | 5 | pub use aptos_transaction_filter::*; 6 | pub use config::TransactionStreamConfig; 7 | pub use transaction_stream::{TransactionStream, TransactionsPBResponse}; 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/diesel.toml: -------------------------------------------------------------------------------- 1 | # For documentation on how to configure this file, 2 | # see https://diesel.rs/guides/configuring-diesel-cli 3 | 4 | [print_schema] 5 | file = "processor_metadata_schema.rs" 6 | schema = "processor_metadata" 7 | 8 | [migrations_directory] 9 | dir = "migrations" 10 | -------------------------------------------------------------------------------- /examples/rustfmt.toml: -------------------------------------------------------------------------------- 1 | combine_control_expr = false 2 | edition = "2021" 3 | imports_granularity = "Crate" 4 | format_macro_matchers = true 5 | group_imports = "One" 6 | hex_literal_case = "Upper" 7 | match_block_trailing_comma = true 8 | newline_style = "Unix" 9 | overflow_delimited_expr = true 10 | reorder_impl_items = true 11 | use_field_init_shorthand = true 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/health/mod.rs: -------------------------------------------------------------------------------- 1 | //! Health checking utilities for processors. 2 | 3 | pub mod core; 4 | pub mod progress; 5 | 6 | // Re-export commonly used types. 7 | pub use core::HealthCheck; 8 | pub use progress::{ 9 | default_no_progress_threshold_secs, ProgressHealthChecker, ProgressHealthConfig, 10 | ProgressStatusProvider, 11 | }; 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/rustfmt.toml: -------------------------------------------------------------------------------- 1 | combine_control_expr = false 2 | edition = "2021" 3 | imports_granularity = "Crate" 4 | format_macro_matchers = true 5 | group_imports = "One" 6 | hex_literal_case = "Upper" 7 | match_block_trailing_comma = true 8 | newline_style = "Unix" 9 | overflow_delimited_expr = true 10 | reorder_impl_items = true 11 | use_field_init_shorthand = true 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/mod.rs: -------------------------------------------------------------------------------- 1 | use diesel_migrations::{embed_migrations, EmbeddedMigrations}; 2 | 3 | pub mod basic_processor; 4 | pub mod models; 5 | pub mod progress; 6 | pub mod subconfigs; 7 | pub mod utils; 8 | 9 | #[path = "db/processor_metadata_schema.rs"] 10 | pub mod processor_metadata_schema; 11 | 12 | pub const SDK_MIGRATIONS: EmbeddedMigrations = embed_migrations!("./src/postgres/db/migrations"); 13 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/down.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); 6 | DROP FUNCTION IF EXISTS diesel_set_updated_at(); 7 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/down.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); 6 | DROP FUNCTION IF EXISTS diesel_set_updated_at(); 7 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/utils.rs: -------------------------------------------------------------------------------- 1 | use instrumented_channel::InstrumentedAsyncReceiver; 2 | use std::time::Duration; 3 | 4 | pub async fn receive_with_timeout( 5 | receiver: &mut InstrumentedAsyncReceiver, 6 | timeout_ms: u64, 7 | ) -> Option { 8 | tokio::time::timeout(Duration::from_millis(timeout_ms), async { 9 | receiver.recv().await 10 | }) 11 | .await 12 | .unwrap() 13 | .ok() 14 | } 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Other 2 | *.swp 3 | 4 | # Rust specific ignores 5 | # Please follow https://help.github.com/en/articles/ignoring-files to create a global 6 | # .gitignore file locally for IDE/Emacs/Vim generated files. 7 | build 8 | **/target 9 | **/*.rs.bk 10 | .idea/ 11 | 12 | # macOS Specific ignores 13 | # General 14 | .DS_Store 15 | .AppleDouble 16 | .LSOverride 17 | 18 | # VSCode settings 19 | .vscode/ 20 | 21 | # Processor config 22 | config.yaml 23 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/into_runnable_step.rs: -------------------------------------------------------------------------------- 1 | use crate::traits::{Processable, RunnableStep}; 2 | 3 | pub trait IntoRunnableStep< 4 | Input, 5 | Output, 6 | Step: Processable, 7 | RunnableType = ::RunType, 8 | > where 9 | Self: Send + Sized + 'static, 10 | Input: Send + 'static, 11 | Output: Send + 'static, 12 | { 13 | fn into_runnable_step(self) -> impl RunnableStep; 14 | } 15 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/example-config.yaml: -------------------------------------------------------------------------------- 1 | # This is a template yaml for the processor 2 | health_check_port: 8085 3 | server_config: 4 | transaction_stream_config: 5 | indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443" 6 | auth_token: "AUTH_TOKEN" 7 | request_name_header: "events-processor" 8 | starting_version: 0 9 | postgres_config: 10 | connection_string: postgresql://postgres:@localhost:5432/example 11 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/moving-average/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-moving-average" 3 | description = "Utility to calculate moving average such as TPS" 4 | version = "0.1.0" 5 | 6 | # Workspace inherited keys 7 | authors = { workspace = true } 8 | edition = { workspace = true } 9 | homepage = { workspace = true } 10 | license = { workspace = true } 11 | publish = { workspace = true } 12 | repository = { workspace = true } 13 | rust-version = { workspace = true } 14 | 15 | [dependencies] 16 | chrono = { workspace = true } 17 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/README.md: -------------------------------------------------------------------------------- 1 | # Example Postgres events processor 2 | 3 | ## About 4 | 5 | A basic processor that indexes events into Postgres. It uses the `process_function` utility function. 6 | 7 | ## How to use 8 | 1. Install Postgres and Diesel CLI 9 | 2. Construct a `config.yaml` file. You can see `postgres-basic-events-example/example-config.yaml` as an example. 10 | 3. cd ~/aptos-indexer-processors-sdk/example 11 | 4. cargo run -p postgres-basic-events-example -- -c postgres-basic-events-example/example-config.yaml 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sample/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sample" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | tracing = { workspace = true } 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/postgres_config.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | #[derive(Clone, Debug, Deserialize, Serialize)] 4 | #[serde(deny_unknown_fields)] 5 | pub struct PostgresConfig { 6 | pub connection_string: String, 7 | // Size of the pool for writes/reads to the DB. Limits maximum number of queries in flight 8 | #[serde(default = "PostgresConfig::default_db_pool_size")] 9 | pub db_pool_size: u32, 10 | } 11 | 12 | impl PostgresConfig { 13 | pub const fn default_db_pool_size() -> u32 { 14 | 150 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/up.sql: -------------------------------------------------------------------------------- 1 | -- Your SQL goes here 2 | CREATE TABLE events ( 3 | sequence_number BIGINT NOT NULL, 4 | creation_number BIGINT NOT NULL, 5 | account_address VARCHAR(66) NOT NULL, 6 | transaction_version BIGINT NOT NULL, 7 | transaction_block_height BIGINT NOT NULL, 8 | type TEXT NOT NULL, 9 | data JSONB NOT NULL, 10 | inserted_at TIMESTAMP NOT NULL DEFAULT NOW(), 11 | event_index BIGINT NOT NULL, 12 | indexed_type VARCHAR(300) NOT NULL, 13 | PRIMARY KEY (transaction_version, event_index) 14 | ); -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/up.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS processor_metadata; 2 | 3 | -- Tracks latest processed version per processor 4 | CREATE TABLE IF NOT EXISTS processor_metadata.processor_status ( 5 | processor VARCHAR(100) UNIQUE PRIMARY KEY NOT NULL, 6 | last_success_version BIGINT NOT NULL, 7 | last_updated TIMESTAMP NOT NULL DEFAULT NOW(), 8 | last_transaction_timestamp TIMESTAMP NULL 9 | ); 10 | 11 | -- Tracks chain id 12 | CREATE TABLE IF NOT EXISTS processor_metadata.ledger_infos (chain_id BIGINT UNIQUE PRIMARY KEY NOT NULL); 13 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Constant values useful for indexing. 5 | 6 | use once_cell::sync::Lazy; 7 | 8 | /// Type string for AptosCoin. 9 | pub const APTOS_COIN_TYPE_STR: &str = "0x1::aptos_coin::AptosCoin"; 10 | 11 | pub static APT_METADATA_ADDRESS_RAW: Lazy<[u8; 32]> = Lazy::new(|| { 12 | let mut addr = [0u8; 32]; 13 | addr[31] = 10u8; 14 | addr 15 | }); 16 | 17 | pub static APT_METADATA_ADDRESS_HEX: Lazy = 18 | Lazy::new(|| format!("0x{}", hex::encode(*APT_METADATA_ADDRESS_RAW))); 19 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/health/core.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | 3 | /// A trait for implementing custom health checks. 4 | /// 5 | /// Implementations can be passed to `register_probes_and_metrics_handler` to add 6 | /// custom health checks to the `/healthz` endpoint. 7 | #[async_trait] 8 | pub trait HealthCheck: Send + Sync { 9 | /// Returns the name of this health check (used in error messages). 10 | fn name(&self) -> &str; 11 | 12 | /// Check if this component is healthy. 13 | /// Returns `Ok(())` if healthy, or `Err(reason)` if not healthy. 14 | async fn is_healthy(&self) -> Result<(), String>; 15 | } 16 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum ProcessorError { 5 | #[error("Step Init Error: {message}")] 6 | StepInitError { message: String }, 7 | #[error("Process Error: {message}")] 8 | ProcessError { message: String }, 9 | #[error("Poll Error: {message}")] 10 | PollError { message: String }, 11 | #[error("DB Store Error: {message}, Query: {query:?}")] 12 | DBStoreError { 13 | message: String, 14 | query: Option, 15 | }, 16 | #[error("Chain ID Check Error: {message}")] 17 | ChainIdCheckError { message: String }, 18 | } 19 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/instrumentation.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | pub trait NamedStep { 4 | fn name(&self) -> String; 5 | 6 | fn type_name(&self) -> String { 7 | std::any::type_name::().to_string() 8 | } 9 | } 10 | 11 | pub struct StepInstrumentor 12 | where 13 | Step: NamedStep + Send + Sized + 'static, 14 | { 15 | _step: PhantomData, 16 | } 17 | 18 | impl Default for StepInstrumentor 19 | where 20 | Step: NamedStep + Send + Sized + 'static, 21 | { 22 | fn default() -> Self { 23 | Self { 24 | _step: Default::default(), 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod async_step; 2 | pub mod instrumentation; 3 | pub mod into_runnable_step; 4 | pub mod pollable_async_step; 5 | pub mod processable; 6 | pub mod processor_trait; 7 | pub mod runnable_step; 8 | 9 | // Re-export the structs and traits 10 | pub use async_step::{AsyncRunType, AsyncStep, RunnableAsyncStep}; 11 | pub use instrumentation::NamedStep; 12 | pub use into_runnable_step::IntoRunnableStep; 13 | pub use pollable_async_step::{PollableAsyncRunType, PollableAsyncStep, RunnablePollableStep}; 14 | pub use processable::{Processable, RunnableStepType}; 15 | pub use runnable_step::{RunnableStep, RunnableStepWithInputReceiver}; 16 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod arcify_step; 2 | pub mod order_by_version_step; 3 | pub mod timed_buffer_step; 4 | pub mod transaction_stream_step; 5 | pub mod version_tracker_step; 6 | pub mod write_rate_limit_step; 7 | 8 | // Re-export the steps 9 | pub use arcify_step::ArcifyStep; 10 | pub use order_by_version_step::OrderByVersionStep; 11 | pub use timed_buffer_step::TimedBufferStep; 12 | pub use transaction_stream_step::TransactionStreamStep; 13 | pub use version_tracker_step::{ 14 | ProcessorStatusSaver, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS, 15 | }; 16 | pub use write_rate_limit_step::{Sizeable, WriteRateLimitConfig, WriteRateLimitStep}; 17 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/schema.rs: -------------------------------------------------------------------------------- 1 | // @generated automatically by Diesel CLI. 2 | 3 | diesel::table! { 4 | events (transaction_version, event_index) { 5 | sequence_number -> Int8, 6 | creation_number -> Int8, 7 | #[max_length = 66] 8 | account_address -> Varchar, 9 | transaction_version -> Int8, 10 | transaction_block_height -> Int8, 11 | #[sql_name = "type"] 12 | type_ -> Text, 13 | data -> Jsonb, 14 | inserted_at -> Timestamp, 15 | event_index -> Int8, 16 | #[max_length = 300] 17 | indexed_type -> Varchar, 18 | } 19 | } 20 | 21 | diesel::allow_tables_to_appear_in_same_query!(events,); 22 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "instrumented-channel" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | delegate = { workspace = true } 16 | derive_builder = { workspace = true } 17 | kanal = { workspace = true } 18 | once_cell = { workspace = true } 19 | prometheus = { workspace = true } 20 | prometheus-client = { workspace = true } 21 | 22 | [dev-dependencies] 23 | tokio = { workspace = true } 24 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/processor_metadata_schema.rs: -------------------------------------------------------------------------------- 1 | // @generated automatically by Diesel CLI. 2 | 3 | pub mod processor_metadata { 4 | diesel::table! { 5 | processor_metadata.ledger_infos (chain_id) { 6 | chain_id -> Int8, 7 | } 8 | } 9 | 10 | diesel::table! { 11 | processor_metadata.processor_status (processor) { 12 | #[max_length = 100] 13 | processor -> Varchar, 14 | last_success_version -> Int8, 15 | last_updated -> Timestamp, 16 | last_transaction_timestamp -> Nullable, 17 | } 18 | } 19 | 20 | diesel::allow_tables_to_appear_in_same_query!(ledger_infos, processor_status,); 21 | } 22 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/README.md: -------------------------------------------------------------------------------- 1 | # Traits 2 | 3 | ## Async Step 4 | 5 | The `async_step.rs` file provides tools for handling asynchronous steps in processing. 6 | 7 | Implement `AsyncStep` for steps that process data directly without buffering. 8 | 9 | ## Pollable Async Step 10 | 11 | The `pollable_async_step.rs` file provides tools for handling steps that can be polled asynchronously. 12 | 13 | Implement `PollableAsyncStep` for steps that buffer or poll data over a duration of time in an asynchronous manner. 14 | 15 | ## Processable 16 | The `processable.rs` file defines the `Processable` trait, which each step implements. 17 | 18 | ## Processor trait 19 | The `processor_trait.rs` defines `ProcessorTrait`, which each processor implements. 20 | 21 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Aptos TypeScript SDK Changelog 2 | 3 | All notable changes to the Aptos TypeScript SDK will be captured in this file. This changelog is written by hand for now. It adheres to the format set out by [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 4 | 5 | ## Unreleased 6 | 7 | ## 0.2.0 (2025-12-09) 8 | 9 | - Renamed `/readiness` endpoint to `/healthz`. 10 | - Added support for the health endpoint for checking processors are making forward progress. 11 | - `ServerArgs::run` now supports additional health checks via the new `run_with_health_checks` method. 12 | - **Breaking**: `run_server_with_config` requires an additional `health_checks` argument now. 13 | 14 | ## 0.1.0 15 | 16 | - Initial version at start of changelog. 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ## Chain ID Check 4 | 5 | The `chain_id_check.rs` file provides tools to manage and verify the chain ID during processing. It helps to ensure the processor is indexing the correct chain ID. 6 | 7 | ### ChainIdChecker Trait 8 | 9 | This trait has two main functions that need to be implemented: 10 | 11 | - `save_chain_id`: Saves the current chain ID to storage. 12 | - `get_chain_id`: Retrieves the chain ID from storage. 13 | 14 | 15 | ### `check_or_update_chain_id` Function 16 | 17 | This function checks if the chain ID from a `TransactionStream` matches the one in storage. If they match, processing continues. If not, it updates the storage with the new chain ID. This helps prevent processing errors due to mismatched chain IDs. 18 | 19 | Use this function in your processor to manage the chain ID. 20 | 21 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/README.md: -------------------------------------------------------------------------------- 1 | # Postgres crate 2 | 3 | ## About 4 | This crate provides a Postgres implementation for the integration layer between the Indexer SDK and Postgres. Features included are tracking the last processed version, retrieving the start version, and validating the chain id. The key components of this crate are core schema and models, Diesel utility functions, and trait implementations. 5 | 6 | ## How to use 7 | 1. Install Postgres and Diesel CLI 8 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`: 9 | ``` 10 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] } 11 | ``` 12 | 3. Copy the `src/db` folder into where you are managing your Diesel migrations. -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "postgres-basic-events-example" 3 | version = "1.0.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | anyhow = { workspace = true } 16 | aptos-indexer-processor-sdk = { workspace = true, features = ["postgres_full"] } 17 | async-trait = { workspace = true } 18 | clap = { workspace = true } 19 | diesel = { workspace = true } 20 | diesel_migrations = { workspace = true } 21 | field_count = { workspace = true } 22 | rayon = { workspace = true } 23 | serde = { workspace = true } 24 | serde_json = { workspace = true } 25 | tokio = { workspace = true } 26 | tracing = { workspace = true } 27 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/ledger_info.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::postgres::{ 7 | processor_metadata_schema::processor_metadata::ledger_infos, utils::database::DbPoolConnection, 8 | }; 9 | use diesel::{Identifiable, Insertable, OptionalExtension, QueryDsl, Queryable}; 10 | use diesel_async::RunQueryDsl; 11 | 12 | #[derive(Debug, Identifiable, Insertable, Queryable)] 13 | #[diesel(table_name = ledger_infos)] 14 | #[diesel(primary_key(chain_id))] 15 | pub struct LedgerInfo { 16 | pub chain_id: i64, 17 | } 18 | 19 | impl LedgerInfo { 20 | pub async fn get(conn: &mut DbPoolConnection<'_>) -> diesel::QueryResult> { 21 | ledger_infos::table 22 | .select(ledger_infos::all_columns) 23 | .first::(conn) 24 | .await 25 | .optional() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-indexer-transaction-stream" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | anyhow = { workspace = true } 17 | aptos-moving-average = { workspace = true } 18 | aptos-protos = { workspace = true } 19 | aptos-transaction-filter = { workspace = true } 20 | chrono = { workspace = true } 21 | futures-util = { workspace = true } 22 | once_cell = { workspace = true } 23 | prometheus = { workspace = true } 24 | prost = { workspace = true } 25 | sample = { workspace = true } 26 | serde = { workspace = true } 27 | tokio = { workspace = true } 28 | tonic = { workspace = true } 29 | tracing = { workspace = true } 30 | url = { workspace = true } 31 | -------------------------------------------------------------------------------- /examples/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xclippy = [ 3 | "clippy", 4 | "--workspace", 5 | "--all-targets", 6 | "--", 7 | "-Dwarnings", 8 | "-Wclippy::all", 9 | "-Aclippy::upper_case_acronyms", 10 | "-Aclippy::enum-variant-names", 11 | "-Aclippy::result-large-err", 12 | "-Aclippy::mutable-key-type", 13 | "-Aclippy::map_identity", # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764 14 | ] 15 | 16 | [build] 17 | rustflags = [ 18 | "--cfg", 19 | "tokio_unstable", 20 | "-C", 21 | "force-frame-pointers=yes", 22 | "-C", 23 | "force-unwind-tables=yes", 24 | ] 25 | 26 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here. 27 | #[target.x86_64-unknown-linux-gnu] 28 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"] 29 | 30 | # 64 bit MSVC 31 | #[target.x86_64-pc-windows-msvc] 32 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB] 33 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xclippy = [ 3 | "clippy", 4 | "--workspace", 5 | "--all-targets", 6 | "--all-features", 7 | "--", 8 | "-Dwarnings", 9 | "-Wclippy::all", 10 | "-Aclippy::upper_case_acronyms", 11 | "-Aclippy::enum-variant-names", 12 | "-Aclippy::result-large-err", 13 | "-Aclippy::mutable-key-type", 14 | "-Aclippy::map_identity", # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764 15 | ] 16 | 17 | [build] 18 | rustflags = [ 19 | "--cfg", 20 | "tokio_unstable", 21 | "-C", 22 | "force-frame-pointers=yes", 23 | "-C", 24 | "force-unwind-tables=yes", 25 | ] 26 | 27 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here. 28 | #[target.x86_64-unknown-linux-gnu] 29 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"] 30 | 31 | # 64 bit MSVC 32 | #[target.x86_64-pc-windows-msvc] 33 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB] 34 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aptos Indexer SDK 2 | Generally, an indexer processor follow this flow: 3 | 4 | 1. Receive a stream of Aptos transactions 5 | 2. Extract data from the transactions 6 | 3. Transform and merge the parsed data into a coherent, standardized schema 7 | 4. Store the transformed data into a database 8 | 9 | The Aptos Indexer SDK works by modeling each processor as a graph of independent steps. Each of the steps in the flow above is written as a `Step` in the SDK, and the output of each `Step` is connected to the input of the next `Step` by a channel. 10 | 11 | # How to use 12 | 13 | To your `Cargo.toml` , add 14 | 15 | ```yaml 16 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" } 17 | aptos-indexer-processor-sdk-server-framework = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" } 18 | ``` 19 | 20 | # Get started 21 | 22 | We’ve created a [Quickstart Guide to Aptos Indexer SDK](https://aptos.dev/build/indexer/indexer-sdk/quickstart) which gets you setup and running an events processor that indexes events on the Aptos blockchain. 23 | 24 | # Documentation 25 | Full documentation can be found [here](https://aptos.dev/build/indexer/indexer-sdk/documentation) 26 | 27 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/processable.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::NamedStep, types::transaction_context::TransactionContext, 3 | utils::errors::ProcessorError, 4 | }; 5 | use anyhow::Result; 6 | use async_trait::async_trait; 7 | 8 | /// Trait to convince the compiler that different step types are mutually exclusive 9 | pub trait RunnableStepType {} 10 | 11 | // This is a dummy implementation for the unit type 12 | impl RunnableStepType for () {} 13 | 14 | #[async_trait] 15 | pub trait Processable 16 | where 17 | Self: NamedStep + Send + Sized + 'static, 18 | { 19 | type Input: Send + 'static; 20 | type Output: Send + 'static; 21 | // This is to convince the compiler of mutual exclusivity of different step impls 22 | type RunType: RunnableStepType; 23 | 24 | /// Lifecycle methods 25 | async fn init(&mut self) {} 26 | async fn cleanup( 27 | &mut self, 28 | ) -> Result>>, ProcessorError> { 29 | Ok(None) 30 | } 31 | 32 | /// Processes a batch of input items and returns a batch of output items. 33 | async fn process( 34 | &mut self, 35 | items: TransactionContext, 36 | ) -> Result>, ProcessorError>; 37 | } 38 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | 4 | members = ["postgres-basic-events-example"] 5 | 6 | [workspace.package] 7 | authors = ["Aptos Labs "] 8 | edition = "2021" 9 | homepage = "https://aptoslabs.com" 10 | license = "Apache-2.0" 11 | publish = false 12 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk" 13 | rust-version = "1.78" 14 | 15 | [workspace.dependencies] 16 | postgres-basic-events-example = { path = "postgres-basic-events-example" } 17 | 18 | aptos-indexer-processor-sdk = { path = "../aptos-indexer-processors-sdk/sdk", features = [ 19 | "postgres_full", 20 | ] } 21 | anyhow = "1.0.86" 22 | async-trait = "0.1.80" 23 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] } 24 | diesel = { version = "~2.3", features = [ 25 | "chrono", 26 | "postgres_backend", 27 | "numeric", 28 | "serde_json", 29 | ] } 30 | diesel_migrations = { version = "2.1.0", features = ["postgres"] } 31 | field_count = "0.1.1" 32 | rayon = "1.10.0" 33 | sample = { path = "../aptos-indexer-processors-sdk/sample" } 34 | serde = { version = "1.0.193", features = ["derive", "rc"] } 35 | serde_json = { version = "1.0.81", features = ["preserve_order"] } 36 | tokio = { version = "1.37.0", features = ["full"] } 37 | tracing = "0.1.34" 38 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/builder/dag.rs: -------------------------------------------------------------------------------- 1 | use crate::traits::{RunnableStep, RunnableStepWithInputReceiver}; 2 | use tokio::task::JoinHandle; 3 | 4 | pub fn connect_two_steps( 5 | left_step: RunnableStepWithInputReceiver, 6 | right_step: RightStep, 7 | channel_size: usize, 8 | ) -> ( 9 | JoinHandle<()>, 10 | RunnableStepWithInputReceiver, 11 | ) 12 | where 13 | LeftInput: Send + 'static, 14 | LeftOutput: Send + 'static, 15 | RightOutput: Send + 'static, 16 | LeftStep: RunnableStep + Send + Sized + 'static, 17 | RightStep: RunnableStep + Send + Sized + 'static, 18 | { 19 | let RunnableStepWithInputReceiver { 20 | input_receiver: left_input_receiver, 21 | _input_sender: _left_input_sender, 22 | step: left_step, 23 | .. 24 | } = left_step; 25 | 26 | let (left_output_receiver, left_handle) = left_step.spawn( 27 | Some(left_input_receiver.clone()), 28 | channel_size, 29 | _left_input_sender, 30 | ); 31 | 32 | let right_step_with_input_receiver = 33 | RunnableStepWithInputReceiver::new(left_output_receiver, right_step); 34 | 35 | (left_handle, right_step_with_input_receiver) 36 | } 37 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/up.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | 6 | 7 | 8 | -- Sets up a trigger for the given table to automatically set a column called 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included 10 | -- in the modified columns) 11 | -- 12 | -- # Example 13 | -- 14 | -- ```sql 15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); 16 | -- 17 | -- SELECT diesel_manage_updated_at('users'); 18 | -- ``` 19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ 20 | BEGIN 21 | EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s 22 | FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); 23 | END; 24 | $$ LANGUAGE plpgsql; 25 | 26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ 27 | BEGIN 28 | IF ( 29 | NEW IS DISTINCT FROM OLD AND 30 | NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at 31 | ) THEN 32 | NEW.updated_at := current_timestamp; 33 | END IF; 34 | RETURN NEW; 35 | END; 36 | $$ LANGUAGE plpgsql; 37 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/up.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | 6 | 7 | 8 | -- Sets up a trigger for the given table to automatically set a column called 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included 10 | -- in the modified columns) 11 | -- 12 | -- # Example 13 | -- 14 | -- ```sql 15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); 16 | -- 17 | -- SELECT diesel_manage_updated_at('users'); 18 | -- ``` 19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ 20 | BEGIN 21 | EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s 22 | FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); 23 | END; 24 | $$ LANGUAGE plpgsql; 25 | 26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ 27 | BEGIN 28 | IF ( 29 | NEW IS DISTINCT FROM OLD AND 30 | NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at 31 | ) THEN 32 | NEW.updated_at := current_timestamp; 33 | END IF; 34 | RETURN NEW; 35 | END; 36 | $$ LANGUAGE plpgsql; 37 | -------------------------------------------------------------------------------- /scripts/rust_lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script runs linting for both the examples and aptos-indexer-processors-sdk directories 4 | # It assumes you have cargo-sort installed. If not, you can install it with: 5 | # cargo install cargo-sort 6 | 7 | set -e 8 | 9 | # Function to run linting in a directory 10 | run_lint() { 11 | local dir=$1 12 | echo "Running lint in $dir..." 13 | 14 | # Change to the directory 15 | cd "$dir" 16 | 17 | # Run in check mode if requested 18 | CHECK_ARG="" 19 | if [ "$1" = "--check" ]; then 20 | CHECK_ARG="--check" 21 | fi 22 | 23 | # Run the linting commands 24 | cargo +nightly xclippy 25 | cargo +nightly fmt $CHECK_ARG 26 | cargo sort --grouped --workspace $CHECK_ARG 27 | 28 | # Return to the original directory 29 | cd .. 30 | } 31 | 32 | # Make sure we're in the root directory 33 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then 34 | echo "Please run this script from the root directory of the project" 35 | exit 1 36 | fi 37 | 38 | # Run linting for both directories 39 | echo "Starting linting process..." 40 | 41 | echo "\nLinting examples directory..." 42 | run_lint "examples" 43 | 44 | echo "\nLinting aptos-indexer-processors-sdk directory..." 45 | run_lint "aptos-indexer-processors-sdk" 46 | 47 | echo "\nLinting completed successfully!" -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/processor_status.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::postgres::{ 7 | processor_metadata_schema::processor_metadata::processor_status, 8 | utils::database::DbPoolConnection, 9 | }; 10 | use diesel::{AsChangeset, ExpressionMethods, Insertable, OptionalExtension, QueryDsl, Queryable}; 11 | use diesel_async::RunQueryDsl; 12 | 13 | #[derive(AsChangeset, Debug, Insertable)] 14 | #[diesel(table_name = processor_status)] 15 | /// Only tracking the latest version successfully processed 16 | pub struct ProcessorStatus { 17 | pub processor: String, 18 | pub last_success_version: i64, 19 | pub last_transaction_timestamp: Option, 20 | } 21 | 22 | #[derive(AsChangeset, Debug, Queryable)] 23 | #[diesel(table_name = processor_status)] 24 | /// Only tracking the latest version successfully processed 25 | pub struct ProcessorStatusQuery { 26 | pub processor: String, 27 | pub last_success_version: i64, 28 | pub last_updated: chrono::NaiveDateTime, 29 | pub last_transaction_timestamp: Option, 30 | } 31 | 32 | impl ProcessorStatusQuery { 33 | pub async fn get_by_processor( 34 | processor_name: &str, 35 | conn: &mut DbPoolConnection<'_>, 36 | ) -> diesel::QueryResult> { 37 | processor_status::table 38 | .filter(processor_status::processor.eq(processor_name)) 39 | .first::(conn) 40 | .await 41 | .optional() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/progress.rs: -------------------------------------------------------------------------------- 1 | //! Postgres-specific progress health checking. 2 | //! 3 | //! This module provides `PostgresProgressStatusProvider`, which implements the 4 | //! `ProgressStatusProvider` trait for postgres-backed processors. 5 | 6 | use crate::{ 7 | health::ProgressStatusProvider, 8 | postgres::{models::processor_status::ProcessorStatusQuery, utils::database::ArcDbPool}, 9 | }; 10 | use async_trait::async_trait; 11 | use chrono::NaiveDateTime; 12 | 13 | /// A postgres-backed implementation of `ProgressStatusProvider`. 14 | /// 15 | /// This queries the `processor_status` table to get the last updated timestamp. 16 | pub struct PostgresProgressStatusProvider { 17 | processor_name: String, 18 | db_pool: ArcDbPool, 19 | } 20 | 21 | impl PostgresProgressStatusProvider { 22 | pub fn new(processor_name: String, db_pool: ArcDbPool) -> Self { 23 | Self { 24 | processor_name, 25 | db_pool, 26 | } 27 | } 28 | } 29 | 30 | #[async_trait] 31 | impl ProgressStatusProvider for PostgresProgressStatusProvider { 32 | async fn get_last_updated(&self) -> Result, String> { 33 | let mut conn = self 34 | .db_pool 35 | .get() 36 | .await 37 | .map_err(|e| format!("Failed to get DB connection: {}", e))?; 38 | 39 | let status = ProcessorStatusQuery::get_by_processor(&self.processor_name, &mut conn) 40 | .await 41 | .map_err(|e| format!("Failed to query processor status: {}", e))?; 42 | 43 | Ok(status.map(|s| s.last_updated)) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/steps/pass_through_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable}, 3 | types::transaction_context::TransactionContext, 4 | utils::errors::ProcessorError, 5 | }; 6 | use anyhow::Result; 7 | use async_trait::async_trait; 8 | use std::marker::PhantomData; 9 | 10 | pub struct PassThroughStep { 11 | name: Option, 12 | _input: PhantomData, 13 | } 14 | 15 | impl Default for PassThroughStep { 16 | fn default() -> Self { 17 | Self { 18 | name: None, 19 | _input: PhantomData, 20 | } 21 | } 22 | } 23 | 24 | impl PassThroughStep { 25 | pub fn new_named(name: String) -> Self { 26 | Self { 27 | name: Some(name), 28 | _input: PhantomData, 29 | } 30 | } 31 | } 32 | 33 | impl AsyncStep for PassThroughStep {} 34 | 35 | impl NamedStep for PassThroughStep { 36 | fn name(&self) -> String { 37 | self.name 38 | .clone() 39 | .unwrap_or_else(|| "PassThroughStep".to_string()) 40 | } 41 | } 42 | 43 | #[async_trait] 44 | impl Processable for PassThroughStep { 45 | type Input = Input; 46 | type Output = Input; 47 | type RunType = AsyncRunType; 48 | 49 | async fn process( 50 | &mut self, 51 | item: TransactionContext, 52 | ) -> Result>, ProcessorError> { 53 | Ok(Some(item)) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: "Lint" 2 | on: 3 | # Allow us to run this specific workflow without a PR 4 | workflow_dispatch: 5 | pull_request: 6 | push: 7 | branches: 8 | - main 9 | 10 | # cancel redundant builds 11 | concurrency: 12 | # for push and workflow_dispatch events we use `github.sha` in the concurrency group and don't really cancel each other out/limit concurrency 13 | # for pull_request events newer jobs cancel earlier jobs to save on CI etc. 14 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | Lint: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - name: Install Dependencies (aptos-indexer-processors-sdkl) 24 | run: | 25 | sudo apt update && sudo apt install libdw-dev 26 | cargo install cargo-sort 27 | rustup update 28 | rustup toolchain install nightly 29 | rustup component add clippy --toolchain nightly 30 | rustup component add rustfmt --toolchain nightly 31 | working-directory: aptos-indexer-processors-sdk 32 | 33 | - name: Install Dependencies (examples) 34 | run: | 35 | sudo apt update && sudo apt install libdw-dev 36 | cargo install cargo-sort 37 | rustup update 38 | rustup toolchain install nightly 39 | rustup component add clippy --toolchain nightly 40 | rustup component add rustfmt --toolchain nightly 41 | working-directory: examples 42 | 43 | - name: Run Linter 44 | run: | 45 | bash scripts/rust_lint.sh --check 46 | 47 | - name: Check Banned Dependencies 48 | run: bash scripts/check_banned_deps.sh 49 | -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: "Tests" 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | Test: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Install Dependencies 20 | run: | 21 | sudo apt update && sudo apt install libdw-dev 22 | cargo install cargo-sort 23 | rustup update 24 | rustup toolchain install nightly 25 | working-directory: aptos-indexer-processors-sdk 26 | 27 | - name: Build with No Default Features 28 | run: cargo build --no-default-features 29 | working-directory: aptos-indexer-processors-sdk 30 | 31 | - name: Run Tests 32 | id: tests 33 | continue-on-error: true # Allow workflow to continue if tests fail 34 | run: cargo test 35 | working-directory: aptos-indexer-processors-sdk 36 | 37 | - name: Notify Eco Infra Oncall about proto update failure 38 | if: | 39 | steps.tests.outcome == 'failure' && 40 | github.event_name == 'pull_request' && 41 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 42 | uses: slackapi/slack-github-action@v1.24.0 43 | with: 44 | # eco-infra-oncall channel. 45 | channel-id: 'C0468USBLQJ' 46 | slack-message: | 47 | :warning: Tests failed on PR with indexer-sdk-update label 48 | PR: ${{ github.event.pull_request.html_url }} 49 | Author: ${{ github.event.pull_request.user.login }} 50 | Title: ${{ github.event.pull_request.title }} 51 | env: 52 | SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} 53 | -------------------------------------------------------------------------------- /scripts/check_banned_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script checks if the crates in both examples and aptos-indexer-processors-sdk 4 | # depend on external deps that they shouldn't. We run this in CI to make sure we don't 5 | # accidentally reintroduce deps that would make the crates unusable for the CLI. 6 | # 7 | # While it would be more reliable to actually build the crate and check what libraries 8 | # it links to, e.g. with otool, it is much cheaper to use cargo tree. As far as I can 9 | # tell the entire Rust ecosystem makes use of these `x-sys` libraries to depend on 10 | # external dynamically linked libraries. 11 | # 12 | # Run this from the root directory of the project. 13 | 14 | # Make sure we're in the root directory 15 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then 16 | echo "Please run this script from the root directory of the project" 17 | exit 1 18 | fi 19 | 20 | # We only run the check on the SDK since that's the only crate used by the CLI. 21 | cd "aptos-indexer-processors-sdk" 22 | 23 | declare -a deps=("pq-sys" "openssl-sys") 24 | 25 | for dep in "${deps[@]}"; do 26 | echo "Checking for banned dependency $dep..." 27 | 28 | # Check for deps. As you can see, we only check for MacOS right now. 29 | # We specify --features postgres_partial because we only care about these banned deps 30 | # for the local testnet use case, in which case it uses only a subset of the 31 | # postgres features that don't include pq-sys. 32 | out=`cargo tree --features postgres_partial -e features,no-build,no-dev --target aarch64-apple-darwin -i "$dep"` 33 | 34 | # If the exit status was non-zero, great, the dep couldn't be found. 35 | if [ $? -ne 0 ]; then 36 | continue 37 | fi 38 | 39 | # If the exit status was zero we have to check the output to see if the dep is in 40 | # use. If it is in the output, it is in use. 41 | if [[ $out != *"$dep"* ]]; then 42 | continue 43 | fi 44 | 45 | echo "Banned dependency $dep found in $dir!" 46 | cd ../.. 47 | exit 1 48 | done 49 | 50 | echo "None of the banned dependencies are in use in $dir, great!" 51 | 52 | exit 0 -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/README.md: -------------------------------------------------------------------------------- 1 | # Custom processor function 2 | 3 | Utility function that lets you create a Postgres processor. It works by running the code in `run_processor` method and applying a `process_function` on each transaction. 4 | 5 | ## How to use 6 | 1. Install Postgres and Diesel CLI 7 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`: 8 | ``` 9 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] } 10 | ``` 11 | 3. Setup Diesel and define your DB migrations. 12 | 4. In `main.rs`, call the `process` function with your indexing logic. You'll need to implement this part: 13 | ``` 14 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("/path/to/src/db/migrations"); 15 | process( 16 | "processor_name".to_string(), 17 | MIGRATIONS, 18 | async |transactions, conn_pool| { 19 | // Implement your indexing logic 20 | }, 21 | ) 22 | .await?; 23 | ``` 24 | The `process` function is an abstraction around a regular SDK processor. 25 | 26 | It runs your db migrations, validates the chain id, connects to Transaction Stream, tracks the last successful version, and processes transactions using your custom indexing logic. 27 | 28 | See [`postgres-basic-events-example`](https://github.com/aptos-labs/aptos-indexer-processor-sdk/tree/main/examples/postgres-basic-events-example) for an example on how to use this function to create a simple processor that writes events to Postgres. 29 | 30 | 5. Construct a `config.yaml` file with this example: 31 | ``` 32 | # This is a template yaml for the processor 33 | health_check_port: 8085 34 | server_config: 35 | transaction_stream_config: 36 | indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443" 37 | auth_token: "AUTH_TOKEN" 38 | request_name_header: "PROCESSOR_NAME" 39 | starting_version: 0 40 | postgres_config: 41 | connection_string: postgresql://postgres:@localhost:5432/example 42 | ``` 43 | 6. Run processor using this command `cargo run -p postgres-basic-events-example -- -c /path/to/config.yaml` 44 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/types/transaction_context.rs: -------------------------------------------------------------------------------- 1 | use aptos_indexer_transaction_stream::utils::time::{ 2 | time_diff_since_pb_timestamp_in_secs, timestamp_to_unixtime, 3 | }; 4 | 5 | /// Contains processed data and associated transaction metadata. 6 | /// 7 | /// The processed data is extracted from transactions and the 8 | /// TransactionContext contains additional metadata about which transactions the extracted 9 | /// data originated from. The metadata is used for metrics and logging purposes. 10 | #[derive(Clone, Default)] 11 | pub struct TransactionContext { 12 | pub data: T, 13 | pub metadata: TransactionMetadata, 14 | } 15 | 16 | impl TransactionContext { 17 | pub fn get_num_transactions(&self) -> u64 { 18 | self.metadata.end_version - self.metadata.start_version + 1 19 | } 20 | 21 | pub fn get_start_transaction_timestamp_unix(&self) -> Option { 22 | self.metadata 23 | .start_transaction_timestamp 24 | .as_ref() 25 | .map(timestamp_to_unixtime) 26 | } 27 | 28 | pub fn get_transaction_latency(&self) -> Option { 29 | self.metadata 30 | .start_transaction_timestamp 31 | .as_ref() 32 | .map(time_diff_since_pb_timestamp_in_secs) 33 | } 34 | } 35 | 36 | impl Ord for TransactionContext { 37 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 38 | self.metadata 39 | .start_version 40 | .cmp(&other.metadata.start_version) 41 | } 42 | } 43 | 44 | impl PartialOrd for TransactionContext { 45 | fn partial_cmp(&self, other: &Self) -> Option { 46 | Some(self.cmp(other)) 47 | } 48 | } 49 | 50 | impl Eq for TransactionContext {} 51 | 52 | impl PartialEq for TransactionContext { 53 | fn eq(&self, other: &Self) -> bool { 54 | self.metadata.start_version == other.metadata.start_version 55 | } 56 | } 57 | 58 | // Metadata about a batch of transactions 59 | #[derive(Clone, Default)] 60 | pub struct TransactionMetadata { 61 | pub start_version: u64, 62 | pub end_version: u64, 63 | pub start_transaction_timestamp: Option, 64 | pub end_transaction_timestamp: Option, 65 | pub total_size_in_bytes: u64, 66 | } 67 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | postgres::utils::database::ArcDbPool, 3 | traits::{AsyncRunType, AsyncStep, NamedStep, Processable}, 4 | types::transaction_context::TransactionContext, 5 | utils::errors::ProcessorError, 6 | }; 7 | use anyhow::Result; 8 | use aptos_protos::transaction::v1::Transaction; 9 | use async_trait::async_trait; 10 | 11 | // Basic process step that runs a process function on each transaction 12 | pub struct BasicProcessorStep 13 | where 14 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 15 | Fut: std::future::Future> + Send + 'static, 16 | { 17 | pub process_function: F, 18 | pub conn_pool: ArcDbPool, 19 | } 20 | 21 | #[async_trait] 22 | impl Processable for BasicProcessorStep 23 | where 24 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 25 | Fut: std::future::Future> + Send + 'static, 26 | { 27 | type Input = Vec; 28 | type Output = (); 29 | type RunType = AsyncRunType; 30 | 31 | async fn process( 32 | &mut self, 33 | transactions: TransactionContext>, 34 | ) -> Result>, ProcessorError> { 35 | (self.process_function)(transactions.data, self.conn_pool.clone()) 36 | .await 37 | .map_err(|e| ProcessorError::ProcessError { 38 | message: format!("Processing transactions failed: {e:?}"), 39 | })?; 40 | Ok(Some(TransactionContext { 41 | data: (), // Stub out data since it's not used in the next step 42 | metadata: transactions.metadata, 43 | })) 44 | } 45 | } 46 | 47 | impl AsyncStep for BasicProcessorStep 48 | where 49 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 50 | Fut: std::future::Future> + Send + 'static, 51 | { 52 | } 53 | 54 | impl NamedStep for BasicProcessorStep 55 | where 56 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 57 | Fut: std::future::Future> + Send + 'static, 58 | { 59 | fn name(&self) -> String { 60 | "BasicProcessorStep".to_string() 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: "Create Release Tag" 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | release_type: 6 | description: 'Type of release (patch/minor)' 7 | required: true 8 | type: choice 9 | options: 10 | - 'release patch' 11 | - 'release minor' 12 | 13 | jobs: 14 | create-tag: 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: write 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 # Fetch all history for all tags and branches 22 | 23 | - name: Determine Next Version 24 | id: next-version 25 | run: | 26 | # Get the latest tag that matches our pattern 27 | latest_tag=$(git tag -l "aptos-indexer-sdk-v*" | sort -V | tail -n 1) 28 | 29 | if [ -z "$latest_tag" ]; then 30 | # If no existing tag, start with 1.0.0 31 | echo "next_tag=aptos-indexer-sdk-v1.0.0" >> $GITHUB_OUTPUT 32 | exit 0 33 | fi 34 | 35 | # Extract version numbers 36 | version=$(echo $latest_tag | sed 's/aptos-indexer-sdk-v//') 37 | major=$(echo $version | cut -d. -f1) 38 | minor=$(echo $version | cut -d. -f2) 39 | patch=$(echo $version | cut -d. -f3) 40 | 41 | if [ "${{ github.event.inputs.release_type }}" = "release patch" ]; then 42 | # Increment patch version 43 | new_version="${major}.${minor}.$((patch + 1))" 44 | else 45 | # Increment minor version, reset patch to 0 46 | new_version="${major}.$((minor + 1)).0" 47 | fi 48 | 49 | echo "next_tag=aptos-indexer-sdk-v${new_version}" >> $GITHUB_OUTPUT 50 | echo "Current version: ${latest_tag}" 51 | echo "Next version will be: aptos-indexer-sdk-v${new_version}" 52 | 53 | - name: Create and Push Tag 54 | run: | 55 | git tag ${{ steps.next-version.outputs.next_tag }} 56 | git push origin ${{ steps.next-version.outputs.next_tag }} 57 | 58 | - name: Create Release 59 | uses: softprops/action-gh-release@v1 60 | with: 61 | tag_name: ${{ steps.next-version.outputs.next_tag }} 62 | name: ${{ steps.next-version.outputs.next_tag }} 63 | generate_release_notes: true -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/timed_buffer_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use anyhow::Result; 9 | use async_trait::async_trait; 10 | use std::time::Duration; 11 | 12 | pub struct TimedBufferStep 13 | where 14 | Self: Sized + Send + 'static, 15 | Input: Send + 'static, 16 | { 17 | pub internal_buffer: Vec>, 18 | pub poll_interval: Duration, 19 | } 20 | 21 | impl TimedBufferStep 22 | where 23 | Self: Sized + Send + 'static, 24 | Input: Send + 'static, 25 | { 26 | #[allow(dead_code)] 27 | pub fn new(poll_interval: Duration) -> Self { 28 | Self { 29 | internal_buffer: Vec::new(), 30 | poll_interval, 31 | } 32 | } 33 | } 34 | 35 | #[async_trait] 36 | impl Processable for TimedBufferStep 37 | where 38 | Input: Send + Sync + 'static, 39 | { 40 | type Input = Input; 41 | type Output = Input; 42 | type RunType = PollableAsyncRunType; 43 | 44 | async fn process( 45 | &mut self, 46 | item: TransactionContext, 47 | ) -> Result>, ProcessorError> { 48 | self.internal_buffer.push(item); 49 | Ok(None) // No immediate output 50 | } 51 | 52 | // Once polling ends, release the remaining items in buffer 53 | async fn cleanup( 54 | &mut self, 55 | ) -> Result>>, ProcessorError> { 56 | Ok(Some(std::mem::take(&mut self.internal_buffer))) 57 | } 58 | } 59 | 60 | #[async_trait] 61 | impl PollableAsyncStep for TimedBufferStep { 62 | fn poll_interval(&self) -> Duration { 63 | self.poll_interval 64 | } 65 | 66 | async fn poll(&mut self) -> Result>>, ProcessorError> { 67 | Ok(Some(std::mem::take(&mut self.internal_buffer))) 68 | } 69 | } 70 | 71 | impl NamedStep for TimedBufferStep { 72 | // TODO: oncecell this somehow? Likely in wrapper struct... 73 | fn name(&self) -> String { 74 | format!("TimedBuffer: {}", std::any::type_name::()) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/moving-average/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![forbid(unsafe_code)] 5 | 6 | use std::collections::VecDeque; 7 | 8 | // TPS data 9 | pub struct MovingAverage { 10 | window_millis: u64, 11 | // (timestamp_millis, value) 12 | values: VecDeque<(u64, u64)>, 13 | sum: u64, 14 | } 15 | 16 | impl MovingAverage { 17 | pub fn new(window_millis: u64) -> Self { 18 | let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64; 19 | let mut queue = VecDeque::new(); 20 | queue.push_back((now, 0)); 21 | Self { 22 | window_millis, 23 | values: queue, 24 | sum: 0, 25 | } 26 | } 27 | 28 | pub fn tick_now(&mut self, value: u64) { 29 | let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64; 30 | self.tick(now, value); 31 | } 32 | 33 | pub fn tick(&mut self, timestamp_millis: u64, value: u64) -> f64 { 34 | self.values.push_back((timestamp_millis, value)); 35 | self.sum += value; 36 | while self.values.len() > 2 { 37 | match self.values.front() { 38 | None => break, 39 | Some((ts, val)) => { 40 | if timestamp_millis - ts > self.window_millis { 41 | self.sum -= val; 42 | self.values.pop_front(); 43 | } else { 44 | break; 45 | } 46 | }, 47 | } 48 | } 49 | self.avg() 50 | } 51 | 52 | // Only be called after tick_now/tick is called. 53 | pub fn avg(&self) -> f64 { 54 | if self.values.len() < 2 { 55 | 0.0 56 | } else { 57 | let elapsed = self.values.back().unwrap().0 - self.values.front().unwrap().0; 58 | (self.sum * 1000) as f64 / elapsed as f64 59 | } 60 | } 61 | 62 | pub fn sum(&self) -> u64 { 63 | self.sum 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod test { 69 | use super::*; 70 | 71 | #[test] 72 | fn test_moving_average() { 73 | // 10 Second window. 74 | let mut ma = MovingAverage::new(10_000); 75 | // 9 seconds spent at 100 TPS. 76 | for _ in 0..9 { 77 | ma.tick_now(100); 78 | std::thread::sleep(std::time::Duration::from_secs(1)); 79 | } 80 | // No matter what algorithm we use, the average should be 99 at least. 81 | let avg = ma.avg(); 82 | assert!(avg >= 99.0, "Average is too low: {avg}"); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/events_model.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::schema::events; 7 | use aptos_indexer_processor_sdk::{ 8 | aptos_protos::transaction::v1::Event as EventPB, 9 | utils::convert::{standardize_address, truncate_str}, 10 | }; 11 | use diesel::{Identifiable, Insertable}; 12 | use field_count::FieldCount; 13 | use serde::{Deserialize, Serialize}; 14 | 15 | // p99 currently is 303 so using 300 as a safe max length 16 | const EVENT_TYPE_MAX_LENGTH: usize = 300; 17 | 18 | #[derive(Clone, Debug, Deserialize, FieldCount, Identifiable, Insertable, Serialize)] 19 | #[diesel(primary_key(transaction_version, event_index))] 20 | #[diesel(table_name = events)] 21 | pub struct Event { 22 | pub sequence_number: i64, 23 | pub creation_number: i64, 24 | pub account_address: String, 25 | pub transaction_version: i64, 26 | pub transaction_block_height: i64, 27 | pub type_: String, 28 | pub data: serde_json::Value, 29 | pub event_index: i64, 30 | pub indexed_type: String, 31 | } 32 | 33 | impl Event { 34 | pub fn from_event( 35 | event: &EventPB, 36 | transaction_version: i64, 37 | transaction_block_height: i64, 38 | event_index: i64, 39 | ) -> Self { 40 | let t: &str = event.type_str.as_ref(); 41 | Event { 42 | account_address: standardize_address( 43 | event.key.as_ref().unwrap().account_address.as_str(), 44 | ), 45 | creation_number: event.key.as_ref().unwrap().creation_number as i64, 46 | sequence_number: event.sequence_number as i64, 47 | transaction_version, 48 | transaction_block_height, 49 | type_: t.to_string(), 50 | data: serde_json::from_str(event.data.as_str()).unwrap(), 51 | event_index, 52 | indexed_type: truncate_str(t, EVENT_TYPE_MAX_LENGTH), 53 | } 54 | } 55 | 56 | pub fn from_events( 57 | events: &[EventPB], 58 | transaction_version: i64, 59 | transaction_block_height: i64, 60 | ) -> Vec { 61 | events 62 | .iter() 63 | .enumerate() 64 | .map(|(index, event)| { 65 | Self::from_event( 66 | event, 67 | transaction_version, 68 | transaction_block_height, 69 | index as i64, 70 | ) 71 | }) 72 | .collect::>() 73 | } 74 | } 75 | 76 | // Prevent conflicts with other things named `Event` 77 | pub type EventModel = Event; 78 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/additional_headers.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use serde::{Deserialize, Serialize}; 3 | use std::{collections::HashMap, str::FromStr}; 4 | use tonic::metadata::{Ascii, MetadataKey, MetadataMap, MetadataValue}; 5 | 6 | #[allow(clippy::too_long_first_doc_paragraph)] 7 | /// This struct holds additional headers that we attach to the request metadata. 8 | /// Regarding serde, we just serialize this as we would a HashMap. 9 | /// Similarly, we expect that format when deserializing. 10 | /// 11 | /// It is necessary to use HashMap because there is no extend method on MetadataMap 12 | /// itself, nor does it implement Serialize / Deserialize. It is better to parse once 13 | /// here right at config validation time anyway, it exposes any error as early as 14 | /// possible and saves us doing parsing (perhaps multiple times) later. 15 | #[derive(Clone, Debug, Default, Serialize, Deserialize)] 16 | #[serde(try_from = "HashMap")] 17 | #[serde(into = "HashMap")] 18 | pub struct AdditionalHeaders(HashMap, MetadataValue>); 19 | 20 | impl AdditionalHeaders { 21 | pub fn drain_into_metadata_map(self, metadata_map: &mut MetadataMap) { 22 | for (key, value) in self.0 { 23 | metadata_map.insert(key, value); 24 | } 25 | } 26 | } 27 | 28 | impl TryFrom> for AdditionalHeaders { 29 | type Error = anyhow::Error; 30 | 31 | /// Build `AdditionalHeaders` from just a map of strings. This can fail if the 32 | /// strings contain invalid characters for metadata keys / values, the chars must 33 | /// only be visible ascii characters. 34 | fn try_from(map: HashMap) -> Result { 35 | let mut out = HashMap::new(); 36 | for (k, v) in map { 37 | let k = MetadataKey::from_str(&k) 38 | .with_context(|| format!("Failed to parse key as ascii metadata key: {k}"))?; 39 | let v = MetadataValue::from_str(&v) 40 | .with_context(|| format!("Failed to parse value as ascii metadata value: {v}"))?; 41 | out.insert(k, v); 42 | } 43 | Ok(AdditionalHeaders(out)) 44 | } 45 | } 46 | 47 | impl From for HashMap { 48 | fn from(headers: AdditionalHeaders) -> Self { 49 | headers 50 | .0 51 | .into_iter() 52 | // It is safe to unwrap here because when building this we asserted that the 53 | // MetadataValue only contained visible ascii characters. 54 | .map(|(k, v)| (k.as_str().to_owned(), v.to_str().unwrap().to_owned())) 55 | .collect() 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/update-processor-sdk-version-legacy.yaml: -------------------------------------------------------------------------------- 1 | name: Update Processor SDK Version 2 | 'on': 3 | pull_request: 4 | types: [labeled, opened, synchronize, reopened, auto_merge_enabled] 5 | branches: 6 | - main 7 | paths: 8 | # Be conservative and only run this workflow when the Cargo.toml file changes. 9 | # Reason: if SDK version is not updated, no change will be picked up by processors. 10 | - aptos-indexer-processors-sdk/Cargo.toml 11 | 12 | permissions: 13 | contents: read 14 | id-token: write 15 | 16 | jobs: 17 | update-processor-sdk-version: 18 | runs-on: ubuntu-latest 19 | # Only run on each PR once an appropriate event occurs 20 | if: | 21 | ( 22 | github.event_name == 'push' || 23 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 24 | ) 25 | steps: 26 | - id: auth 27 | uses: "google-github-actions/auth@v2" 28 | with: 29 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 30 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 31 | - name: Get Secret Manager Secrets 32 | id: secrets 33 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 34 | with: 35 | secrets: |- 36 | token:aptos-ci/github-actions-repository-dispatch 37 | - name: Checkout code 38 | uses: actions/checkout@v4 39 | with: 40 | token: ${{ steps.secrets.outputs.token }} 41 | - name: Capture the commit hash 42 | id: commit_hash 43 | run: | 44 | # Echo the commit hash to the output 45 | echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)" 46 | # Echo the PR branch name to the output 47 | echo "::set-output name=branch_name::${{ github.head_ref }}" 48 | - name: Setup Rust 49 | uses: actions-rust-lang/setup-rust-toolchain@v1 50 | - name: Install toml 51 | run: cargo install toml-cli 52 | - name: Capture aptos-protos commit hash 53 | id: aptos_protos_commit_hash 54 | run: | 55 | cd aptos-indexer-processors-sdk 56 | aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev) 57 | echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}" 58 | - name: Dispatch Event to processors Repo 59 | uses: peter-evans/repository-dispatch@v3.0.0 60 | with: 61 | token: ${{ steps.secrets.outputs.token }} 62 | repository: 'aptos-labs/aptos-indexer-processors' 63 | event-type: 'sdk-dependency-update' 64 | client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}' 65 | -------------------------------------------------------------------------------- /.github/workflows/update-processor-sdk-version.yaml: -------------------------------------------------------------------------------- 1 | name: Update Processor SDK Version 2 | 'on': 3 | pull_request: 4 | types: [labeled, opened, synchronize, reopened, auto_merge_enabled] 5 | branches: 6 | - main 7 | paths: 8 | # Be conservative and only run this workflow when the Cargo.toml file changes. 9 | # Reason: if SDK version is not updated, no change will be picked up by processors. 10 | - aptos-indexer-processors-sdk/Cargo.toml 11 | 12 | permissions: 13 | contents: read 14 | id-token: write 15 | 16 | jobs: 17 | update-processor-sdk-version: 18 | runs-on: ubuntu-latest 19 | # Only run on each PR once an appropriate event occurs 20 | if: | 21 | ( 22 | github.event_name == 'push' || 23 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 24 | ) 25 | steps: 26 | - id: auth 27 | uses: "google-github-actions/auth@v2" 28 | with: 29 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 30 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 31 | - name: Get Secret Manager Secrets 32 | id: secrets 33 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 34 | with: 35 | secrets: |- 36 | token:aptos-ci/github-actions-repository-dispatch 37 | - name: Checkout code 38 | uses: actions/checkout@v4 39 | with: 40 | token: ${{ steps.secrets.outputs.token }} 41 | - name: Capture the commit hash 42 | id: commit_hash 43 | run: | 44 | # Echo the commit hash to the output 45 | echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)" 46 | # Echo the PR branch name to the output 47 | echo "::set-output name=branch_name::${{ github.head_ref }}" 48 | - name: Setup Rust 49 | uses: actions-rust-lang/setup-rust-toolchain@v1 50 | - name: Install toml 51 | run: cargo install toml-cli 52 | - name: Capture aptos-protos commit hash 53 | id: aptos_protos_commit_hash 54 | run: | 55 | cd aptos-indexer-processors-sdk 56 | aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev) 57 | echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}" 58 | - name: Dispatch Event to processors Repo 59 | uses: peter-evans/repository-dispatch@v3.0.0 60 | with: 61 | token: ${{ steps.secrets.outputs.token }} 62 | repository: 'aptos-labs/aptos-indexer-processors-v2' 63 | event-type: 'sdk-dependency-update' 64 | client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}' 65 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/chain_id_check.rs: -------------------------------------------------------------------------------- 1 | use super::errors::ProcessorError; 2 | use anyhow::Result; 3 | use aptos_indexer_transaction_stream::{TransactionStream, TransactionStreamConfig}; 4 | use async_trait::async_trait; 5 | use tracing::info; 6 | 7 | #[async_trait] 8 | pub trait ChainIdChecker { 9 | /// Save the chain ID to storage. This is used to track the chain ID that's being processed 10 | /// and prevents the processor from processing the wrong chain. 11 | async fn save_chain_id(&self, chain_id: u64) -> Result<()>; 12 | 13 | /// Get the chain ID from storage. This is used to track the chain ID that's being processed 14 | /// and prevents the processor from processing the wrong chain. 15 | async fn get_chain_id(&self) -> Result>; 16 | } 17 | 18 | /// Verify the chain id from TransactionStream against the database. 19 | pub async fn check_or_update_chain_id( 20 | transaction_stream_config: &TransactionStreamConfig, 21 | chain_id_checker: &T, 22 | ) -> Result 23 | where 24 | T: ChainIdChecker, 25 | { 26 | info!("Checking if chain id is correct"); 27 | let maybe_existing_chain_id = 28 | chain_id_checker 29 | .get_chain_id() 30 | .await 31 | .map_err(|e| ProcessorError::ChainIdCheckError { 32 | message: format!("Error getting chain id from db: {e:?}"), 33 | })?; 34 | 35 | let transaction_stream = TransactionStream::new(transaction_stream_config.clone()) 36 | .await 37 | .map_err(|e| ProcessorError::ChainIdCheckError { 38 | message: format!("Error initializing transaction stream: {e:?}"), 39 | })?; 40 | let grpc_chain_id = 41 | transaction_stream 42 | .get_chain_id() 43 | .await 44 | .map_err(|e| ProcessorError::ChainIdCheckError { 45 | message: format!("Error getting chain id from transaction stream: {e:?}"), 46 | })?; 47 | 48 | match maybe_existing_chain_id { 49 | Some(chain_id) => { 50 | if chain_id != grpc_chain_id { 51 | return Err(ProcessorError::ChainIdCheckError { 52 | message: format!( 53 | "Wrong chain id detected! Trying to index chain {grpc_chain_id} now but existing data is for chain {chain_id}", 54 | ), 55 | }); 56 | } 57 | 58 | info!( 59 | chain_id = chain_id, 60 | "Chain id matches! Continue to index...", 61 | ); 62 | Ok(chain_id) 63 | }, 64 | None => { 65 | info!( 66 | chain_id = grpc_chain_id, 67 | "Saving chain id to db, continue to index..." 68 | ); 69 | chain_id_checker 70 | .save_chain_id(grpc_chain_id) 71 | .await 72 | .map_err(|e| ProcessorError::ChainIdCheckError { 73 | message: format!("Error saving chain id to db: {e:?}"), 74 | })?; 75 | Ok(grpc_chain_id) 76 | }, 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/config.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::additional_headers::AdditionalHeaders; 2 | use aptos_transaction_filter::BooleanTransactionFilter; 3 | use serde::{Deserialize, Serialize}; 4 | use std::time::Duration; 5 | use url::Url; 6 | 7 | #[derive(Clone, Debug, Deserialize, Serialize)] 8 | #[serde(deny_unknown_fields)] 9 | pub struct TransactionStreamConfig { 10 | pub indexer_grpc_data_service_address: Url, 11 | pub starting_version: Option, 12 | pub request_ending_version: Option, 13 | pub auth_token: String, 14 | pub request_name_header: String, 15 | #[serde(default)] 16 | pub additional_headers: AdditionalHeaders, 17 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_interval")] 18 | pub indexer_grpc_http2_ping_interval_secs: u64, 19 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_timeout")] 20 | pub indexer_grpc_http2_ping_timeout_secs: u64, 21 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_timeout")] 22 | pub indexer_grpc_reconnection_timeout_secs: u64, 23 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_response_item_timeout")] 24 | pub indexer_grpc_response_item_timeout_secs: u64, 25 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_max_retries")] 26 | pub indexer_grpc_reconnection_max_retries: u64, 27 | #[serde(default)] 28 | pub transaction_filter: Option, 29 | } 30 | 31 | impl TransactionStreamConfig { 32 | pub const fn indexer_grpc_http2_ping_interval(&self) -> Duration { 33 | Duration::from_secs(self.indexer_grpc_http2_ping_interval_secs) 34 | } 35 | 36 | pub const fn indexer_grpc_http2_ping_timeout(&self) -> Duration { 37 | Duration::from_secs(self.indexer_grpc_http2_ping_timeout_secs) 38 | } 39 | 40 | pub const fn indexer_grpc_reconnection_timeout(&self) -> Duration { 41 | Duration::from_secs(self.indexer_grpc_reconnection_timeout_secs) 42 | } 43 | 44 | pub const fn indexer_grpc_response_item_timeout(&self) -> Duration { 45 | Duration::from_secs(self.indexer_grpc_response_item_timeout_secs) 46 | } 47 | 48 | /// Indexer GRPC http2 ping interval in seconds. Defaults to 30. 49 | /// Tonic ref: https://docs.rs/tonic/latest/tonic/transport/channel/struct.Endpoint.html#method.http2_keep_alive_interval 50 | pub const fn default_indexer_grpc_http2_ping_interval() -> u64 { 51 | 30 52 | } 53 | 54 | /// Indexer GRPC http2 ping timeout in seconds. Defaults to 10. 55 | pub const fn default_indexer_grpc_http2_ping_timeout() -> u64 { 56 | 10 57 | } 58 | 59 | /// Default timeout for establishing a grpc connection. Defaults to 5 seconds. 60 | pub const fn default_indexer_grpc_reconnection_timeout() -> u64 { 61 | 5 62 | } 63 | 64 | /// Default timeout for receiving an item from grpc stream. Defaults to 60 seconds. 65 | pub const fn default_indexer_grpc_response_item_timeout() -> u64 { 66 | 60 67 | } 68 | 69 | /// Default max retries for reconnecting to grpc. Defaults to 100. 70 | pub const fn default_indexer_grpc_reconnection_max_retries() -> u64 { 71 | 5 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/database.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use async_trait::async_trait; 3 | use testcontainers::{ 4 | core::{IntoContainerPort, WaitFor}, 5 | runners::AsyncRunner, 6 | ContainerAsync, GenericImage, ImageExt, 7 | }; 8 | 9 | const POSTGRES_IMAGE: &str = "postgres"; 10 | const POSTGRES_VERSION: &str = "14"; 11 | const POSTGRES_PORT: u16 = 5432; 12 | const POSTGRES_DB: &str = "postgres"; 13 | const POSTGRES_USER: &str = "postgres"; 14 | const POSTGRES_PASSWORD: &str = "postgres"; 15 | 16 | #[async_trait] 17 | pub trait TestDatabase: Send + Sync { 18 | /// Set up the test container using user-defined code. 19 | async fn setup<'a>(&'a mut self) -> anyhow::Result<()>; 20 | 21 | /// Retrieve the database connection URL after setup. 22 | fn get_db_url(&self) -> String; 23 | } 24 | 25 | #[derive(Default)] 26 | pub struct PostgresTestDatabase { 27 | connection_string: String, 28 | postgres_container: Option>, 29 | } 30 | 31 | impl PostgresTestDatabase { 32 | pub fn new() -> Self { 33 | PostgresTestDatabase { 34 | postgres_container: None, 35 | connection_string: String::new(), 36 | } 37 | } 38 | 39 | /// Helper method to configure and start the Postgres container. 40 | async fn start_postgres_container(&mut self) -> Result> { 41 | let postgres_image = GenericImage::new(POSTGRES_IMAGE, POSTGRES_VERSION) 42 | .with_exposed_port(POSTGRES_PORT.tcp()) 43 | .with_wait_for(WaitFor::message_on_stderr( 44 | "database system is ready to accept connections", 45 | )) 46 | .with_env_var("POSTGRES_DB", POSTGRES_DB) 47 | .with_env_var("POSTGRES_USER", POSTGRES_USER) 48 | .with_env_var("POSTGRES_PASSWORD", POSTGRES_PASSWORD); 49 | 50 | let container = postgres_image 51 | .start() 52 | .await 53 | .context("Failed to start Postgres container")?; 54 | 55 | Ok(container) 56 | } 57 | 58 | /// Helper method to get the host and port information of the running container. 59 | async fn get_connection_info(&self) -> Result<(String, u16)> { 60 | let host = self 61 | .postgres_container 62 | .as_ref() 63 | .context("Postgres container not initialized")? 64 | .get_host() 65 | .await 66 | .context("Failed to get container host")?; 67 | 68 | let port = self 69 | .postgres_container 70 | .as_ref() 71 | .context("Postgres container not initialized")? 72 | .get_host_port_ipv4(5432) 73 | .await 74 | .context("Failed to get container port")?; 75 | 76 | Ok((host.to_string(), port)) 77 | } 78 | } 79 | 80 | #[async_trait] 81 | impl TestDatabase for PostgresTestDatabase { 82 | /// Set up the Postgres container and get the database connection URL. 83 | async fn setup(&mut self) -> Result<()> { 84 | self.postgres_container = Some(self.start_postgres_container().await?); 85 | 86 | let (host, port) = self.get_connection_info().await?; 87 | 88 | self.connection_string = format!("postgres://postgres:postgres@{host}:{port}/postgres"); 89 | Ok(()) 90 | } 91 | 92 | /// Retrieve the Postgres connection URL after the container has been set up. 93 | fn get_db_url(&self) -> String { 94 | self.connection_string.clone() 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/main.rs: -------------------------------------------------------------------------------- 1 | use crate::events_model::EventModel; 2 | use anyhow::Result; 3 | use aptos_indexer_processor_sdk::{ 4 | aptos_protos::transaction::v1::transaction::TxnData, 5 | postgres::{ 6 | basic_processor::process, 7 | utils::database::{execute_in_chunks, MAX_DIESEL_PARAM_SIZE}, 8 | }, 9 | }; 10 | use diesel::{pg::Pg, query_builder::QueryFragment}; 11 | use diesel_migrations::{embed_migrations, EmbeddedMigrations}; 12 | use field_count::FieldCount; 13 | use rayon::prelude::*; 14 | use tracing::{error, info, warn}; 15 | 16 | pub mod events_model; 17 | #[path = "db/schema.rs"] 18 | pub mod schema; 19 | 20 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("src/db/migrations"); 21 | 22 | fn insert_events_query( 23 | items_to_insert: Vec, 24 | ) -> impl QueryFragment + diesel::query_builder::QueryId + Send { 25 | use crate::schema::events::dsl::*; 26 | diesel::insert_into(crate::schema::events::table) 27 | .values(items_to_insert) 28 | .on_conflict((transaction_version, event_index)) 29 | .do_nothing() 30 | } 31 | 32 | #[tokio::main] 33 | async fn main() -> Result<()> { 34 | process( 35 | "events_processor".to_string(), 36 | MIGRATIONS, 37 | async |transactions, conn_pool| { 38 | let events = transactions 39 | .par_iter() 40 | .map(|txn| { 41 | let txn_version = txn.version as i64; 42 | let block_height = txn.block_height as i64; 43 | let txn_data = match txn.txn_data.as_ref() { 44 | Some(data) => data, 45 | None => { 46 | warn!( 47 | transaction_version = txn_version, 48 | "Transaction data doesn't exist" 49 | ); 50 | return vec![]; 51 | }, 52 | }; 53 | let default = vec![]; 54 | let raw_events = match txn_data { 55 | TxnData::BlockMetadata(tx_inner) => &tx_inner.events, 56 | TxnData::Genesis(tx_inner) => &tx_inner.events, 57 | TxnData::User(tx_inner) => &tx_inner.events, 58 | _ => &default, 59 | }; 60 | 61 | EventModel::from_events(raw_events, txn_version, block_height) 62 | }) 63 | .flatten() 64 | .collect::>(); 65 | 66 | // Store events in the database 67 | let execute_res = execute_in_chunks( 68 | conn_pool.clone(), 69 | insert_events_query, 70 | &events, 71 | MAX_DIESEL_PARAM_SIZE / EventModel::field_count(), 72 | ) 73 | .await; 74 | match execute_res { 75 | Ok(_) => { 76 | info!( 77 | "Events version [{}, {}] stored successfully", 78 | transactions.first().unwrap().version, 79 | transactions.last().unwrap().version 80 | ); 81 | Ok(()) 82 | }, 83 | Err(e) => { 84 | error!("Failed to store events: {:?}", e); 85 | Err(e) 86 | }, 87 | } 88 | }, 89 | ) 90 | .await?; 91 | Ok(()) 92 | } 93 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-indexer-processor-sdk" 3 | version = "0.2.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | ahash = { workspace = true } 16 | anyhow = { workspace = true } 17 | aptos-indexer-transaction-stream = { workspace = true } 18 | aptos-protos = { workspace = true } 19 | aptos-transaction-filter = { workspace = true } 20 | async-trait = { workspace = true } 21 | autometrics = { workspace = true, optional = true } 22 | axum = { workspace = true, optional = true } 23 | backtrace = { workspace = true, optional = true } 24 | bcs = { workspace = true } 25 | bigdecimal = { workspace = true } 26 | chrono = { workspace = true } 27 | clap = { workspace = true, optional = true } 28 | derive_builder = { workspace = true } 29 | diesel = { workspace = true, optional = true } 30 | diesel-async = { workspace = true, optional = true } 31 | diesel_migrations = { workspace = true, optional = true } 32 | field_count = { workspace = true, optional = true } 33 | futures = { workspace = true } 34 | futures-util = { workspace = true } 35 | hex = { workspace = true } 36 | indexmap = { workspace = true } 37 | instrumented-channel = { workspace = true } 38 | kanal = { workspace = true } 39 | mockall = { workspace = true } 40 | native-tls = { workspace = true, optional = true } 41 | num_cpus = { workspace = true } 42 | once_cell = { workspace = true } 43 | petgraph = { workspace = true } 44 | postgres-native-tls = { workspace = true, optional = true } 45 | prometheus = { workspace = true } 46 | prometheus-client = { workspace = true } 47 | serde = { workspace = true } 48 | serde_json = { workspace = true } 49 | serde_yaml = { workspace = true } 50 | sha2 = { workspace = true } 51 | tempfile = { workspace = true } 52 | testcontainers = { workspace = true, optional = true } 53 | thiserror = { workspace = true } 54 | tiny-keccak = { workspace = true } 55 | tokio = { workspace = true } 56 | tokio-postgres = { workspace = true, optional = true } 57 | tokio-retry = { workspace = true, optional = true } 58 | tokio-stream = { workspace = true, optional = true } 59 | toml = { workspace = true, optional = true } 60 | tonic = { workspace = true, optional = true } 61 | tracing = { workspace = true } 62 | tracing-subscriber = { workspace = true, optional = true } 63 | url = { workspace = true } 64 | 65 | [target.'cfg(target_os = "linux")'.dependencies] 66 | aptos-system-utils = { workspace = true } 67 | 68 | [features] 69 | # Server framework feature enables the HTTP server with metrics and health check endpoints. 70 | # This requires tokio net features for the TCP listener. 71 | server_framework = [ 72 | "autometrics", 73 | "axum", 74 | "backtrace", 75 | "clap", 76 | "toml", 77 | "tracing-subscriber", 78 | "tokio/net", 79 | ] 80 | postgres_partial = [ 81 | "diesel", 82 | "diesel-async", 83 | "diesel_migrations", 84 | "field_count", 85 | "postgres-native-tls", 86 | "native-tls", 87 | "server_framework", 88 | "tokio-postgres", 89 | ] 90 | # When using the postgres_full features we enable the diesel/postgres feature. We configure 91 | # it in a feature so the CLI can opt out, since it cannot tolerate the libpq dep. 92 | # Recall that features should always be additive. 93 | postgres_full = ["postgres_partial", "diesel/postgres"] 94 | testing_framework = [ 95 | "testcontainers", 96 | "tonic", 97 | "tokio-retry", 98 | "tokio-stream", 99 | "tokio/net", 100 | ] 101 | default = [] 102 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/arcify_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable}, 3 | types::transaction_context::TransactionContext, 4 | utils::errors::ProcessorError, 5 | }; 6 | use std::{marker::PhantomData, sync::Arc}; 7 | 8 | pub struct ArcifyStep 9 | where 10 | Self: Sized + Send + 'static, 11 | { 12 | _marker: PhantomData, 13 | } 14 | 15 | impl ArcifyStep { 16 | pub fn new() -> Self { 17 | Self { 18 | _marker: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | impl Default for ArcifyStep { 24 | fn default() -> Self { 25 | Self::new() 26 | } 27 | } 28 | 29 | #[async_trait::async_trait] 30 | impl Processable for ArcifyStep 31 | where 32 | T: Send + Sync + 'static, 33 | { 34 | type Input = Vec; 35 | type Output = Vec>; 36 | type RunType = AsyncRunType; 37 | 38 | async fn process( 39 | &mut self, 40 | item: TransactionContext>, 41 | ) -> Result>>>, ProcessorError> { 42 | Ok(Some(TransactionContext { 43 | data: item.data.into_iter().map(Arc::new).collect(), 44 | metadata: item.metadata, 45 | })) 46 | } 47 | } 48 | 49 | impl AsyncStep for ArcifyStep where T: Send + Sync + 'static {} 50 | 51 | impl NamedStep for ArcifyStep 52 | where 53 | T: Send + Sync + 'static, 54 | { 55 | fn name(&self) -> String { 56 | format!("Arcify<{}>", std::any::type_name::()) 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | use crate::types::transaction_context::TransactionMetadata; 64 | 65 | fn generate_transaction_context() -> TransactionContext> { 66 | TransactionContext { 67 | data: vec![1, 2, 3], 68 | metadata: TransactionMetadata { 69 | start_version: 0, 70 | end_version: 0, 71 | start_transaction_timestamp: None, 72 | end_transaction_timestamp: None, 73 | total_size_in_bytes: 0, 74 | }, 75 | } 76 | } 77 | 78 | #[tokio::test] 79 | #[allow(clippy::needless_return)] 80 | async fn test_arcify_step_process() { 81 | let mut step = ArcifyStep::::new(); 82 | let input = generate_transaction_context(); 83 | 84 | let result = step.process(input).await.unwrap().unwrap(); 85 | assert_eq!(result.data.len(), 3); 86 | assert_eq!(*result.data[0], 1); 87 | assert_eq!(*result.data[1], 2); 88 | assert_eq!(*result.data[2], 3); 89 | } 90 | 91 | #[tokio::test] 92 | #[allow(clippy::needless_return)] 93 | async fn test_arcify_strong_count() { 94 | let mut step = ArcifyStep::::new(); 95 | let input = generate_transaction_context(); 96 | 97 | let result = step.process(input).await.unwrap().unwrap(); 98 | assert_eq!(Arc::strong_count(&result.data[0]), 1); 99 | 100 | let arc_clone = result.data[0].clone(); 101 | assert_eq!(Arc::strong_count(&arc_clone), 2); 102 | 103 | drop(arc_clone); 104 | assert_eq!(Arc::strong_count(&result.data[0]), 1); 105 | } 106 | 107 | #[tokio::test] 108 | #[allow(clippy::needless_return)] 109 | async fn test_arcify_ptr_eq() { 110 | let mut step = ArcifyStep::::new(); 111 | let input = generate_transaction_context(); 112 | 113 | let result = step.process(input).await.unwrap().unwrap(); 114 | let arc_clone = result.data[0].clone(); 115 | assert!(Arc::ptr_eq(&result.data[0], &arc_clone)); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/health/progress.rs: -------------------------------------------------------------------------------- 1 | //! Progress health checking for processors. 2 | 3 | use super::core::HealthCheck; 4 | use async_trait::async_trait; 5 | use chrono::{NaiveDateTime, Utc}; 6 | use serde::{Deserialize, Serialize}; 7 | use tracing::warn; 8 | 9 | /// Configuration for progress health checking. 10 | #[derive(Clone, Debug, Deserialize, Serialize)] 11 | pub struct ProgressHealthConfig { 12 | /// The number of seconds the processor is allowed to make no progress before it's 13 | /// considered unhealthy. 14 | #[serde(default = "default_no_progress_threshold_secs")] 15 | pub no_progress_threshold_secs: u64, 16 | } 17 | 18 | pub const fn default_no_progress_threshold_secs() -> u64 { 19 | 45 20 | } 21 | 22 | impl Default for ProgressHealthConfig { 23 | fn default() -> Self { 24 | Self { 25 | no_progress_threshold_secs: default_no_progress_threshold_secs(), 26 | } 27 | } 28 | } 29 | 30 | /// A trait for providing the processor's last updated timestamp. 31 | /// 32 | /// Implement this trait to provide a custom backend for progress health checking. 33 | /// The SDK provides `PostgresProgressStatusProvider` for postgres-backed processors. 34 | #[async_trait] 35 | pub trait ProgressStatusProvider: Send + Sync { 36 | /// Get the last updated timestamp for the processor. 37 | /// Returns `None` if the processor hasn't written status yet (e.g., during startup). 38 | async fn get_last_updated(&self) -> Result, String>; 39 | } 40 | 41 | /// A health check that verifies the processor is making forward progress. 42 | /// 43 | /// This is generic over the status provider, allowing different backends (postgres, etc.). 44 | pub struct ProgressHealthChecker { 45 | processor_name: String, 46 | status_provider: Box, 47 | no_progress_threshold_secs: u64, 48 | } 49 | 50 | impl ProgressHealthChecker { 51 | pub fn new( 52 | processor_name: String, 53 | status_provider: Box, 54 | config: ProgressHealthConfig, 55 | ) -> Self { 56 | Self { 57 | processor_name, 58 | status_provider, 59 | no_progress_threshold_secs: config.no_progress_threshold_secs, 60 | } 61 | } 62 | } 63 | 64 | #[async_trait] 65 | impl HealthCheck for ProgressHealthChecker { 66 | fn name(&self) -> &str { 67 | "ProgressHealth" 68 | } 69 | 70 | async fn is_healthy(&self) -> Result<(), String> { 71 | let last_updated = self.status_provider.get_last_updated().await?; 72 | 73 | match last_updated { 74 | Some(last_updated) => { 75 | let now = Utc::now().naive_utc(); 76 | let seconds_since_update = (now - last_updated).num_seconds(); 77 | let timeout = self.no_progress_threshold_secs as i64; 78 | 79 | if seconds_since_update > timeout { 80 | warn!( 81 | processor = %self.processor_name, 82 | seconds_since_update, 83 | timeout, 84 | "Processor has not made progress within timeout" 85 | ); 86 | Err(format!( 87 | "Last updated {} seconds ago (threshold: {} seconds)", 88 | seconds_since_update, timeout 89 | )) 90 | } else { 91 | Ok(()) 92 | } 93 | }, 94 | None => { 95 | // The processor hasn't written to the status table yet. 96 | // This is okay during startup, so we return healthy. 97 | Ok(()) 98 | }, 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /.github/workflows/update-proto-dependency.yaml: -------------------------------------------------------------------------------- 1 | name: Update Proto Dependency 2 | 3 | on: 4 | repository_dispatch: 5 | types: [proto-dependency-update] 6 | workflow_dispatch: 7 | inputs: 8 | commit_hash: 9 | description: 'Commit hash to update proto to' 10 | required: true 11 | branch_name: 12 | description: 'Branch name (without -update-aptos-protos suffix)' 13 | required: true 14 | default: 'main' 15 | 16 | permissions: 17 | contents: write 18 | pull-requests: write 19 | id-token: write 20 | 21 | jobs: 22 | update-the-dependency: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - id: auth 26 | uses: "google-github-actions/auth@v2" 27 | with: 28 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 29 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 30 | - name: Get Secret Manager Secrets 31 | id: secrets 32 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 33 | with: 34 | secrets: |- 35 | token:aptos-ci/github-actions-repository-dispatch 36 | - name: Configure Git user 37 | run: | 38 | git config --global user.name "Aptos Bot" 39 | git config --global user.email "aptos-bot@aptoslabs.com" 40 | - name: Checkout 41 | uses: actions/checkout@v4 42 | with: 43 | token: ${{ steps.secrets.outputs.token }} 44 | - name: Setup Rust 45 | uses: actions-rust-lang/setup-rust-toolchain@v1 46 | 47 | - name: Install toml 48 | run: cargo install toml-cli 49 | 50 | - name: Update the dependency 51 | run: | 52 | set -e 53 | toml set Cargo.toml workspace.dependencies.aptos-protos.rev ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }} > Cargo.tmp && mv Cargo.tmp Cargo.toml 54 | working-directory: aptos-indexer-processors-sdk/ 55 | 56 | - name: Commit and Push Changes 57 | run: | 58 | set -e 59 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 60 | git checkout -b "$branch_name" 61 | git add Cargo.toml 62 | git commit -m "Update aptos-protos to ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }}" 63 | git push origin "$branch_name" --force 64 | env: 65 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 66 | working-directory: aptos-indexer-processors-sdk/ 67 | 68 | - name: Check if PR Already Exists 69 | id: check_pr 70 | run: | 71 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 72 | existing_pr=$(gh pr list --base main --head "$branch_name" --json number --jq '.[].number') 73 | if [ -n "$existing_pr" ]; then 74 | echo "::set-output name=if_pr_exists::true" 75 | else 76 | echo "::set-output name=if_pr_exists::false" 77 | fi 78 | env: 79 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 80 | - name: Create Pull Request 81 | if: steps.check_pr.outputs.if_pr_exists == 'false' 82 | run: | 83 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 84 | gh pr create --title "Update aptos-protos to upstream branch ${{ github.event.client_payload.branch_name }}" \ 85 | --body "This PR updates aptos-protos to new version." \ 86 | --base main \ 87 | --head "$branch_name" \ 88 | --label "indexer-sdk-update" 89 | env: 90 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 91 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/time.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Helpers related to dealing with dates and times. 5 | 6 | use aptos_protos::util::timestamp::Timestamp; 7 | use chrono::Utc; 8 | 9 | /// 9999-12-31 23:59:59, this is the max supported by Google BigQuery. 10 | pub const MAX_TIMESTAMP_SECS: i64 = 253_402_300_799; 11 | 12 | pub fn parse_timestamp(ts: &Timestamp, version: i64) -> chrono::DateTime { 13 | let final_ts = if ts.seconds >= MAX_TIMESTAMP_SECS || ts.seconds < 0 { 14 | Timestamp { 15 | seconds: MAX_TIMESTAMP_SECS, 16 | nanos: 0, 17 | } 18 | } else { 19 | *ts 20 | }; 21 | chrono::DateTime::from_timestamp(final_ts.seconds, final_ts.nanos as u32) 22 | .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}")) 23 | } 24 | 25 | pub fn parse_timestamp_secs(ts: u64, version: i64) -> chrono::DateTime { 26 | chrono::DateTime::from_timestamp(std::cmp::min(ts, MAX_TIMESTAMP_SECS as u64) as i64, 0) 27 | .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}")) 28 | } 29 | 30 | pub fn compute_nanos_since_epoch(datetime: chrono::DateTime) -> u64 { 31 | // The Unix epoch is 1970-01-01T00:00:00Z 32 | let unix_epoch = chrono::DateTime::::from_timestamp(0, 0).unwrap(); 33 | let duration_since_epoch = datetime.signed_duration_since(unix_epoch); 34 | 35 | // Convert the duration to nanoseconds and return 36 | duration_since_epoch.num_seconds() as u64 * 1_000_000_000 37 | + duration_since_epoch.subsec_nanos() as u64 38 | } 39 | 40 | /// Convert the protobuf Timestamp to epcoh time in seconds. 41 | pub fn time_diff_since_pb_timestamp_in_secs(timestamp: &Timestamp) -> f64 { 42 | let current_timestamp = std::time::SystemTime::now() 43 | .duration_since(std::time::UNIX_EPOCH) 44 | .expect("SystemTime before UNIX EPOCH!") 45 | .as_secs_f64(); 46 | let transaction_time = timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9; 47 | current_timestamp - transaction_time 48 | } 49 | 50 | /// Convert the protobuf timestamp to ISO format 51 | pub fn timestamp_to_iso(timestamp: &Timestamp) -> String { 52 | let dt = parse_timestamp(timestamp, 0); 53 | dt.format("%Y-%m-%dT%H:%M:%S%.9fZ").to_string() 54 | } 55 | 56 | /// Convert the protobuf timestamp to unixtime 57 | pub fn timestamp_to_unixtime(timestamp: &Timestamp) -> f64 { 58 | timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use super::*; 64 | use chrono::Datelike; 65 | 66 | #[test] 67 | fn test_parse_timestamp() { 68 | let ts = parse_timestamp( 69 | &Timestamp { 70 | seconds: 1649560602, 71 | nanos: 0, 72 | }, 73 | 1, 74 | ) 75 | .naive_utc(); 76 | assert_eq!(ts.and_utc().timestamp(), 1649560602); 77 | assert_eq!(ts.year(), 2022); 78 | 79 | let too_high_ts = parse_timestamp( 80 | &Timestamp { 81 | seconds: u64::MAX as i64, // Convert a really big number to i64 82 | nanos: 0, 83 | }, 84 | 1, 85 | ); 86 | let max_ts = parse_timestamp( 87 | &Timestamp { 88 | seconds: MAX_TIMESTAMP_SECS, 89 | nanos: 0, 90 | }, 91 | 1, 92 | ); 93 | assert_eq!(too_high_ts, max_ts); 94 | 95 | let ts2 = parse_timestamp_secs(600000000000000, 2); 96 | assert_eq!(ts2.year(), 9999); 97 | 98 | let ts3 = parse_timestamp_secs(1659386386, 2); 99 | assert_eq!(ts3.timestamp(), 1659386386); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/property_map.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use crate::utils::convert::{convert_bcs_hex, convert_bcs_hex_new}; 5 | use ahash::AHashMap; 6 | use indexmap::IndexMap; 7 | use serde::{Deserialize, Serialize}; 8 | use serde_json::{Result, Value}; 9 | #[derive(Serialize, Deserialize, Debug, Clone)] 10 | pub struct PropertyValue { 11 | value: String, 12 | typ: String, 13 | } 14 | 15 | pub fn create_property_value(typ: String, value: String) -> Result { 16 | Ok(PropertyValue { 17 | value: convert_bcs_hex(typ.clone(), value.clone()).unwrap_or(value), 18 | typ, 19 | }) 20 | } 21 | 22 | #[derive(Serialize, Deserialize, Debug, Clone)] 23 | pub struct PropertyMap { 24 | data: IndexMap, 25 | } 26 | 27 | impl PropertyMap { 28 | /// Deserializes PropertyValue from bcs encoded json 29 | pub fn from_bcs_encode_str(val: Value) -> Option { 30 | let mut pm = PropertyMap { 31 | data: IndexMap::new(), 32 | }; 33 | let records: &Vec = val.get("map")?.get("data")?.as_array()?; 34 | for entry in records { 35 | let key = entry.get("key")?.as_str()?; 36 | let val = entry.get("value")?.get("value")?.as_str()?; 37 | let typ = entry.get("value")?.get("type")?.as_str()?; 38 | let pv = create_property_value(typ.to_string(), val.to_string()).ok()?; 39 | pm.data.insert(key.to_string(), pv); 40 | } 41 | Some(Self::to_flat_json(pm)) 42 | } 43 | 44 | /// Flattens PropertyMap which can't be easily consumable by downstream. 45 | /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}} 46 | /// becomes Object {"creation_time_sec": "1666125588"} 47 | fn to_flat_json(val: PropertyMap) -> Value { 48 | let mut map = AHashMap::new(); 49 | for (k, v) in val.data { 50 | map.insert(k, v.value); 51 | } 52 | serde_json::to_value(map).unwrap() 53 | } 54 | } 55 | 56 | #[derive(Serialize, Deserialize, Debug, Clone)] 57 | pub struct TokenObjectPropertyValue { 58 | value: String, 59 | typ: u8, 60 | } 61 | 62 | pub fn create_token_object_property_value( 63 | typ: u8, 64 | value: String, 65 | ) -> Result { 66 | Ok(TokenObjectPropertyValue { 67 | value: convert_bcs_hex_new(typ, value.clone()).unwrap_or(value), 68 | typ, 69 | }) 70 | } 71 | 72 | #[derive(Serialize, Deserialize, Debug, Clone)] 73 | pub struct TokenObjectPropertyMap { 74 | data: IndexMap, 75 | } 76 | 77 | impl TokenObjectPropertyMap { 78 | /// Deserializes PropertyValue from bcs encoded json 79 | pub fn from_bcs_encode_str(val: Value) -> Option { 80 | let mut pm = TokenObjectPropertyMap { 81 | data: IndexMap::new(), 82 | }; 83 | let records: &Vec = val.get("data")?.as_array()?; 84 | for entry in records { 85 | let key = entry.get("key")?.as_str()?; 86 | let val = entry.get("value")?.get("value")?.as_str()?; 87 | let typ = entry.get("value")?.get("type")?.as_u64()?; 88 | let pv = create_token_object_property_value(typ as u8, val.to_string()).ok()?; 89 | pm.data.insert(key.to_string(), pv); 90 | } 91 | Some(Self::to_flat_json_new(pm)) 92 | } 93 | 94 | /// Flattens PropertyMap which can't be easily consumable by downstream. 95 | /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}} 96 | /// becomes Object {"creation_time_sec": "1666125588"} 97 | fn to_flat_json_new(val: TokenObjectPropertyMap) -> Value { 98 | let mut map = IndexMap::new(); 99 | for (k, v) in val.data { 100 | map.insert(k, v.value); 101 | } 102 | serde_json::to_value(map).unwrap() 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | 4 | members = [ 5 | "instrumented-channel", 6 | "moving-average", 7 | "sample", 8 | "sdk", 9 | "transaction-stream", 10 | ] 11 | 12 | [workspace.package] 13 | authors = ["Aptos Labs "] 14 | edition = "2021" 15 | homepage = "https://aptoslabs.com" 16 | license = "Apache-2.0" 17 | publish = false 18 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk" 19 | rust-version = "1.78" 20 | 21 | [workspace.dependencies] 22 | aptos-indexer-processor-sdk = { path = "sdk" } 23 | aptos-indexer-transaction-stream = { path = "transaction-stream" } 24 | instrumented-channel = { path = "instrumented-channel" } 25 | aptos-moving-average = { path = "moving-average" } 26 | sample = { path = "sample" } 27 | 28 | ahash = { version = "0.8.7", features = ["serde"] } 29 | anyhow = "1.0.98" 30 | aptos-protos = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2c0d9dd8fd1c27c10bc264a66917d86e8b0467b5" } 31 | aptos-system-utils = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2c0d9dd8fd1c27c10bc264a66917d86e8b0467b5" } 32 | aptos-transaction-filter = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2c0d9dd8fd1c27c10bc264a66917d86e8b0467b5" } 33 | async-trait = "0.1.80" 34 | autometrics = { version = "1.0.1", features = ["prometheus-exporter"] } 35 | axum = "0.7.5" 36 | backtrace = "0.3.58" 37 | bcs = { git = "https://github.com/aptos-labs/bcs.git", rev = "d31fab9d81748e2594be5cd5cdf845786a30562d" } 38 | bigdecimal = { version = "0.4.0", features = ["serde"] } 39 | chrono = { version = "0.4.19", features = ["clock", "serde"] } 40 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] } 41 | # Do NOT enable the postgres feature here, it is conditionally enabled in a feature 42 | # block in the Cargo.toml file for the processor crate. 43 | # https://github.com/aptos-labs/aptos-indexer-processors/pull/325 44 | diesel = { version = "~2.3", features = [ 45 | "chrono", 46 | "postgres_backend", 47 | "numeric", 48 | "serde_json", 49 | ] } 50 | diesel-async = { version = "0.7.4", features = [ 51 | "async-connection-wrapper", 52 | "postgres", 53 | "bb8", 54 | "tokio", 55 | ] } 56 | diesel_migrations = { version = "2.1.0", features = ["postgres"] } 57 | delegate = "0.12.0" 58 | derive_builder = "0.20.0" 59 | field_count = "0.1.1" 60 | futures = "0.3.30" 61 | futures-util = "0.3.21" 62 | hex = "0.4.3" 63 | indexmap = { version = "2.7.0", features = ["serde"] } 64 | itertools = "0.13.0" 65 | 66 | # Locking this because newer versions of kanal are using the unstable feature error_in_core, which 67 | # will break the Aptos CLI. 68 | kanal = "=0.1.0-pre8" 69 | lazy_static = "1.4.0" 70 | mockall = "0.12.1" 71 | num_cpus = "1.16.0" 72 | once_cell = { version = "1.19.0" } 73 | petgraph = "0.6.5" 74 | prometheus = "0.13.3" 75 | prometheus-client = "0.22.2" 76 | prost = { version = "0.13.4", features = ["no-recursion-limit"] } 77 | rayon = "1.10.0" 78 | serde = { version = "1.0.193", features = ["derive", "rc"] } 79 | serde_json = { version = "1.0.81", features = ["preserve_order"] } 80 | serde_yaml = "0.8.24" 81 | sha2 = "0.9.3" 82 | strum = { version = "0.24.1", features = ["derive"] } 83 | tempfile = "3.3.0" 84 | testcontainers = "0.20.1" 85 | thiserror = "1.0.61" 86 | tiny-keccak = { version = "2.0.2", features = ["keccak", "sha3"] } 87 | tracing = "0.1.34" 88 | tokio = { version = "1.37.0", features = [ 89 | "rt", 90 | "rt-multi-thread", 91 | "sync", 92 | "time", 93 | "macros", 94 | ] } 95 | tokio-retry = { version = "0.3.0" } 96 | toml = "0.7.4" 97 | tonic = { version = "0.12.3", features = [ 98 | "tls", 99 | "tls-roots", 100 | "transport", 101 | "prost", 102 | "codegen", 103 | "zstd", 104 | ] } 105 | tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } 106 | url = { version = "2.5.1", features = ["serde"] } 107 | 108 | # Postgres SSL support 109 | native-tls = "0.2.11" 110 | postgres-native-tls = "0.5.0" 111 | tokio-postgres = "0.7.10" 112 | tokio-stream = { version = "0.1.16", features = ["net"] } 113 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/runnable_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{traits::NamedStep, types::transaction_context::TransactionContext}; 2 | use instrumented_channel::{InstrumentedAsyncReceiver, InstrumentedAsyncSender}; 3 | use std::marker::PhantomData; 4 | use tokio::task::JoinHandle; 5 | 6 | pub trait RunnableStep: NamedStep 7 | where 8 | Self: Send + Sized + 'static, 9 | Input: Send + 'static, 10 | Output: Send + 'static, 11 | { 12 | #[allow(clippy::too_long_first_doc_paragraph)] 13 | /// Runs the step, forever, with the given input receiver and returns the output receiver and the join handle. 14 | fn spawn( 15 | self, 16 | input_receiver: Option>>, 17 | output_channel_size: usize, 18 | _input_sender: Option>>, 19 | ) -> ( 20 | InstrumentedAsyncReceiver>, 21 | JoinHandle<()>, 22 | ); 23 | 24 | fn add_input_receiver( 25 | self, 26 | input_receiver: InstrumentedAsyncReceiver>, 27 | ) -> RunnableStepWithInputReceiver { 28 | RunnableStepWithInputReceiver::new(input_receiver, self) 29 | } 30 | 31 | fn type_name(&self) -> String { 32 | ::type_name(self) 33 | } 34 | } 35 | 36 | pub struct RunnableStepWithInputReceiver 37 | where 38 | Input: Send + 'static, 39 | Output: Send + 'static, 40 | Step: RunnableStep, 41 | { 42 | pub input_receiver: InstrumentedAsyncReceiver>, 43 | pub step: Step, 44 | _output: PhantomData, 45 | pub _input_sender: Option>>, 46 | } 47 | 48 | impl RunnableStepWithInputReceiver 49 | where 50 | Input: Send + 'static, 51 | Output: Send + 'static, 52 | Step: RunnableStep, 53 | { 54 | pub fn new( 55 | input_receiver: InstrumentedAsyncReceiver>, 56 | step: Step, 57 | ) -> Self { 58 | Self { 59 | input_receiver, 60 | step, 61 | _output: Default::default(), 62 | _input_sender: None, 63 | } 64 | } 65 | 66 | #[allow(clippy::too_long_first_doc_paragraph)] 67 | /// This should only be used for the inputless first step to keep the async sender in scope so the channel stays alive. 68 | pub fn add_input_sender( 69 | mut self, 70 | _input_sender: InstrumentedAsyncSender>, 71 | ) -> Self { 72 | self._input_sender = Some(_input_sender); 73 | self 74 | } 75 | } 76 | 77 | impl NamedStep for RunnableStepWithInputReceiver 78 | where 79 | Input: 'static + Send, 80 | Output: 'static + Send, 81 | Step: RunnableStep, 82 | { 83 | fn name(&self) -> String { 84 | self.step.name() 85 | } 86 | 87 | fn type_name(&self) -> String { 88 | format!( 89 | "{} (via RunnableStepWithInputReceiver)", 90 | RunnableStep::type_name(&self.step) 91 | ) 92 | } 93 | } 94 | 95 | impl RunnableStep 96 | for RunnableStepWithInputReceiver 97 | where 98 | Input: Send + 'static, 99 | Output: Send + 'static, 100 | Step: RunnableStep, 101 | { 102 | fn spawn( 103 | self, 104 | input_receiver: Option>>, 105 | channel_size: usize, 106 | _input_sender: Option>>, 107 | ) -> ( 108 | InstrumentedAsyncReceiver>, 109 | JoinHandle<()>, 110 | ) { 111 | if input_receiver.is_some() { 112 | panic!("Input receiver already set for {:?}", self.name()); 113 | } 114 | self.step 115 | .spawn(Some(self.input_receiver), channel_size, _input_sender) 116 | } 117 | 118 | fn add_input_receiver( 119 | self, 120 | _input_receiver: InstrumentedAsyncReceiver>, 121 | ) -> RunnableStepWithInputReceiver { 122 | panic!("Input receiver already set for {:?}", self.name()); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/src/channel_metrics.rs: -------------------------------------------------------------------------------- 1 | use derive_builder::Builder; 2 | use once_cell::sync::Lazy; 3 | use prometheus_client::{ 4 | encoding::EncodeLabelSet, 5 | metrics::{counter::Counter, family::Family, gauge::Gauge}, 6 | registry::Registry, 7 | }; 8 | use std::sync::atomic::AtomicU64; 9 | 10 | pub const METRICS_PREFIX: &str = "aptos_procsdk_channel_"; 11 | 12 | pub fn init_channel_metrics_registry(registry: &mut Registry) { 13 | registry.register( 14 | format!("{}_{}", METRICS_PREFIX, "sent_messages_count"), 15 | "Number of messages sent", 16 | SENT_MESSAGES_COUNT.clone(), 17 | ); 18 | 19 | registry.register( 20 | format!("{}_{}", METRICS_PREFIX, "received_messages_count"), 21 | "Number of messages received", 22 | RECEIVED_MESSAGES_COUNT.clone(), 23 | ); 24 | 25 | registry.register( 26 | format!("{}_{}", METRICS_PREFIX, "send_duration"), 27 | "Duration in seconds to send a message", 28 | SEND_DURATION.clone(), 29 | ); 30 | 31 | registry.register( 32 | format!("{}_{}", METRICS_PREFIX, "receive_duration"), 33 | "Duration in seconds to receive a message", 34 | RECEIVE_DURATION.clone(), 35 | ); 36 | 37 | registry.register( 38 | format!("{}_{}", METRICS_PREFIX, "failed_sends_count"), 39 | "Number of failed sends", 40 | FAILED_SENDS_COUNT.clone(), 41 | ); 42 | 43 | registry.register( 44 | format!("{}_{}", METRICS_PREFIX, "failed_receives_count"), 45 | "Number of failed receives", 46 | FAILED_RECEIVES_COUNT.clone(), 47 | ); 48 | 49 | registry.register( 50 | format!("{}_{}", METRICS_PREFIX, "channel_size"), 51 | "Size of the channel", 52 | CHANNEL_SIZE.clone(), 53 | ); 54 | } 55 | 56 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 57 | pub struct ChannelMetricLabels { 58 | pub output_of: String, 59 | } 60 | 61 | pub static SENT_MESSAGES_COUNT: Lazy> = 62 | Lazy::new(Family::::default); 63 | 64 | pub static RECEIVED_MESSAGES_COUNT: Lazy> = 65 | Lazy::new(Family::::default); 66 | 67 | pub static SEND_DURATION: Lazy>> = 68 | Lazy::new(Family::>::default); 69 | 70 | pub static RECEIVE_DURATION: Lazy>> = 71 | Lazy::new(Family::>::default); 72 | 73 | pub static FAILED_SENDS_COUNT: Lazy> = 74 | Lazy::new(Family::::default); 75 | 76 | pub static FAILED_RECEIVES_COUNT: Lazy> = 77 | Lazy::new(Family::::default); 78 | 79 | pub static CHANNEL_SIZE: Lazy> = 80 | Lazy::new(Family::::default); 81 | 82 | #[derive(Builder, Clone)] 83 | pub struct ChannelMetrics { 84 | pub labels: ChannelMetricLabels, 85 | } 86 | 87 | impl ChannelMetrics { 88 | pub fn new(output_of: String) -> Self { 89 | Self { 90 | labels: ChannelMetricLabels { output_of }, 91 | } 92 | } 93 | } 94 | 95 | impl ChannelMetrics { 96 | pub fn inc_sent_messages_count(&self) -> &Self { 97 | SENT_MESSAGES_COUNT.get_or_create(&self.labels).inc(); 98 | self 99 | } 100 | 101 | pub fn inc_received_messages_count(&self) -> &Self { 102 | RECEIVED_MESSAGES_COUNT.get_or_create(&self.labels).inc(); 103 | self 104 | } 105 | 106 | pub fn inc_failed_sends_count(&self) -> &Self { 107 | FAILED_SENDS_COUNT.get_or_create(&self.labels).inc(); 108 | self 109 | } 110 | 111 | pub fn inc_failed_receives_count(&self) -> &Self { 112 | FAILED_RECEIVES_COUNT.get_or_create(&self.labels).inc(); 113 | self 114 | } 115 | 116 | pub fn log_send_duration(&self, duration: f64) -> &Self { 117 | SEND_DURATION.get_or_create(&self.labels).set(duration); 118 | self 119 | } 120 | 121 | pub fn log_receive_duration(&self, duration: f64) -> &Self { 122 | RECEIVE_DURATION.get_or_create(&self.labels).set(duration); 123 | self 124 | } 125 | 126 | pub fn log_channel_size(&self, size: u64) -> &Self { 127 | CHANNEL_SIZE.get_or_create(&self.labels).set(size as i64); 128 | self 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/mock_grpc.rs: -------------------------------------------------------------------------------- 1 | use aptos_protos::indexer::v1::{ 2 | raw_data_server::{RawData, RawDataServer}, 3 | GetTransactionsRequest, ProcessedRange, TransactionsResponse, 4 | }; 5 | use futures::Stream; 6 | use std::{collections::HashMap, pin::Pin}; 7 | use tokio::time::{timeout, Duration}; 8 | use tokio_stream::wrappers::TcpListenerStream; 9 | use tonic::{transport::Server, Request, Response, Status}; 10 | 11 | // Bind to port 0 to get a random available port 12 | const GRPC_ADDRESS: &str = "127.0.0.1:0"; 13 | 14 | #[derive(Default)] 15 | pub struct MockGrpcServer { 16 | pub transactions_response: Vec, 17 | pub chain_id: u64, 18 | } 19 | 20 | type ResponseStream = Pin> + Send>>; 21 | 22 | #[tonic::async_trait] 23 | impl RawData for MockGrpcServer { 24 | type GetTransactionsStream = ResponseStream; 25 | 26 | async fn get_transactions( 27 | &self, 28 | req: Request, 29 | ) -> Result, Status> { 30 | let request = req.into_inner(); 31 | let starting_version = request.starting_version.unwrap_or(0); // Default to 0 if starting_version is not provided 32 | let transactions_count = request.transactions_count.unwrap_or(1); // Default to 1 if transactions_count is not provided 33 | let mut collected_transactions = Vec::new(); 34 | 35 | let mut transaction_map = HashMap::new(); 36 | for transaction_response in &self.transactions_response { 37 | for tx in &transaction_response.transactions { 38 | transaction_map.insert(tx.version, tx.clone()); 39 | } 40 | } 41 | 42 | let mut sorted_transactions: Vec<_> = transaction_map 43 | .iter() 44 | .filter(|(&version, _)| version >= starting_version) 45 | .map(|(_, tx)| tx.clone()) 46 | .collect(); 47 | sorted_transactions.sort_by_key(|tx| tx.version); 48 | 49 | collected_transactions.extend( 50 | sorted_transactions 51 | .into_iter() 52 | .take(transactions_count as usize), 53 | ); 54 | 55 | let result = if !collected_transactions.is_empty() { 56 | TransactionsResponse { 57 | transactions: collected_transactions, 58 | chain_id: Some(self.chain_id), 59 | processed_range: Some(ProcessedRange { 60 | first_version: starting_version, 61 | last_version: starting_version + transactions_count - 1, 62 | }), 63 | } 64 | } else { 65 | // Return a default response with chain_id if no transactions are found 66 | let mut default_transaction_response = self.transactions_response[0].clone(); 67 | default_transaction_response.chain_id = Some(self.chain_id); 68 | default_transaction_response 69 | }; 70 | 71 | let stream = futures::stream::iter(vec![Ok(result)]); 72 | Ok(Response::new(Box::pin(stream))) 73 | } 74 | } 75 | 76 | impl MockGrpcServer { 77 | pub async fn run(self) -> anyhow::Result { 78 | let listener = tokio::net::TcpListener::bind(GRPC_ADDRESS).await?; 79 | let bound_addr = listener.local_addr()?; // Get the actual bound address 80 | 81 | // Convert the TcpListener into a TcpListenerStream (wrapping it with `?` to handle potential errors) 82 | let stream = TcpListenerStream::new(listener); 83 | 84 | // Build and start the gRPC server without graceful shutdown 85 | let server = Server::builder().add_service( 86 | RawDataServer::new(self) 87 | .accept_compressed(tonic::codec::CompressionEncoding::Zstd) // Enable compression for incoming requests 88 | .send_compressed(tonic::codec::CompressionEncoding::Zstd), // Compress outgoing responses 89 | ); 90 | 91 | tokio::spawn(async move { 92 | // This server will run until the process is killed or the task is stopped 93 | let server_timeout = Duration::from_secs(60); 94 | 95 | match timeout(server_timeout, server.serve_with_incoming(stream)).await { 96 | Ok(result) => match result { 97 | Ok(_) => { 98 | println!("Server stopped successfully."); 99 | }, 100 | Err(e) => { 101 | eprintln!("Failed to run gRPC server: {e:?}"); 102 | }, 103 | }, 104 | Err(_) => { 105 | eprintln!("Server timed out and was stopped."); 106 | }, 107 | } 108 | }); 109 | 110 | // Return the port number so it can be used by other parts of the program 111 | let port = bound_addr.port(); 112 | println!("Server is running on port {port}",); 113 | 114 | Ok(port) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/cli_parser.rs: -------------------------------------------------------------------------------- 1 | use once_cell::sync::Lazy; 2 | use std::sync::Mutex; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct TestArgs { 6 | pub generate_output: bool, 7 | pub output_path: Option, 8 | } 9 | 10 | // Define a global static to store the parsed arguments 11 | static TEST_CONFIG: Lazy> = Lazy::new(|| { 12 | let args = parse_test_args(); 13 | Mutex::new(args) 14 | }); 15 | 16 | // Function to fetch global test args 17 | pub fn get_test_config() -> (bool, Option) { 18 | let test_args = TEST_CONFIG.lock().unwrap().clone(); 19 | (test_args.generate_output, test_args.output_path) 20 | } 21 | 22 | pub fn parse_test_args() -> TestArgs { 23 | let raw_args: Vec = std::env::args().collect(); 24 | 25 | // Find the "--" separator, or default to include all args after the test name 26 | let clap_args_position = raw_args.iter().position(|arg| arg == "--"); 27 | 28 | // Determine the starting position for custom arguments 29 | let custom_args_start = match clap_args_position { 30 | Some(position) => position + 1, // Start after the "--" if it exists 31 | None => 1, // Start after the test name, skip the first element 32 | }; 33 | 34 | // Collect custom arguments based on determined start position 35 | let custom_args: Vec = raw_args[custom_args_start..].to_vec(); 36 | 37 | // Manually parse the "generate" flag 38 | let generate_flag = custom_args.contains(&"generate".to_string()); 39 | 40 | // Manually parse the "--output-path" flag and get its associated value 41 | let output_path = custom_args 42 | .windows(2) 43 | .find(|args| args[0] == "output-path") 44 | .map(|args| args[1].clone()); 45 | 46 | println!("Parsed generate flag: {generate_flag}",); 47 | println!( 48 | "Parsed output_path: {}", 49 | output_path.clone().unwrap_or_else(|| "None".to_string()) 50 | ); 51 | 52 | TestArgs { 53 | generate_output: generate_flag, 54 | output_path, 55 | } 56 | } 57 | 58 | #[cfg(test)] 59 | mod tests { 60 | use super::*; 61 | pub fn parse_test_args_from_vec(args: Vec) -> TestArgs { 62 | // Find the "--" separator (if it exists) 63 | let clap_args_position = args.iter().position(|arg| arg == "--"); 64 | 65 | // Only pass the arguments that come after "--", if it exists 66 | let custom_args: Vec = match clap_args_position { 67 | Some(position) => args[position + 1..].to_vec(), // Slice after `--` 68 | None => Vec::new(), // If no `--` is found, treat as no custom args 69 | }; 70 | 71 | // Manually parse the "--generate" flag 72 | let generate_output_flag = custom_args.contains(&"generate".to_string()); 73 | 74 | // Manually parse the "--output-path" flag and get its associated value 75 | let output_path = custom_args 76 | .windows(2) 77 | .find(|args| args[0] == "output-path") 78 | .map(|args| args[1].clone()); 79 | 80 | println!("Parsed generate_output_flag: {generate_output_flag}"); 81 | println!( 82 | "Parsed output_path: {}", 83 | output_path.clone().unwrap_or_else(|| "None".to_string()) 84 | ); 85 | 86 | TestArgs { 87 | generate_output: generate_output_flag, 88 | output_path, 89 | } 90 | } 91 | 92 | #[test] 93 | fn test_parse_generate_output_flag() { 94 | let args = vec![ 95 | "test_binary".to_string(), 96 | "--".to_string(), 97 | "generate".to_string(), 98 | ]; 99 | let parsed = parse_test_args_from_vec(args); 100 | assert!(parsed.generate_output); 101 | assert_eq!(parsed.output_path, None); 102 | } 103 | 104 | #[test] 105 | fn test_parse_output_path() { 106 | let args = vec![ 107 | "test_binary".to_string(), 108 | "--".to_string(), 109 | "output-path".to_string(), 110 | "/some/path".to_string(), 111 | ]; 112 | let parsed = parse_test_args_from_vec(args); 113 | assert!(!parsed.generate_output); 114 | assert_eq!(parsed.output_path, Some("/some/path".to_string())); 115 | } 116 | 117 | #[test] 118 | fn test_parse_both_arguments() { 119 | let args = vec![ 120 | "test_binary".to_string(), 121 | "--".to_string(), 122 | "generate".to_string(), 123 | "output-path".to_string(), 124 | "/some/other/path".to_string(), 125 | ]; 126 | let parsed = parse_test_args_from_vec(args); 127 | assert!(parsed.generate_output); 128 | assert_eq!(parsed.output_path, Some("/some/other/path".to_string())); 129 | } 130 | 131 | #[test] 132 | fn test_parse_no_arguments() { 133 | let args = vec!["test_binary".to_string()]; 134 | let parsed = parse_test_args_from_vec(args); 135 | assert!(!parsed.generate_output); 136 | assert_eq!(parsed.output_path, None); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/checkpoint.rs: -------------------------------------------------------------------------------- 1 | use super::database::{execute_with_better_error, execute_with_better_error_conn, ArcDbPool}; 2 | use crate::{ 3 | aptos_indexer_transaction_stream::{utils::time::parse_timestamp, TransactionStreamConfig}, 4 | common_steps::ProcessorStatusSaver, 5 | postgres::{ 6 | models::{ 7 | ledger_info::LedgerInfo, 8 | processor_status::{ProcessorStatus, ProcessorStatusQuery}, 9 | }, 10 | processor_metadata_schema::processor_metadata::{ledger_infos, processor_status}, 11 | }, 12 | types::transaction_context::TransactionContext, 13 | utils::{chain_id_check::ChainIdChecker, errors::ProcessorError}, 14 | }; 15 | use anyhow::{Context, Result}; 16 | use async_trait::async_trait; 17 | use diesel::{query_dsl::methods::FilterDsl, upsert::excluded, ExpressionMethods}; 18 | 19 | /// A trait implementation of ChainIdChecker for Postgres. 20 | pub struct PostgresChainIdChecker { 21 | pub db_pool: ArcDbPool, 22 | } 23 | 24 | impl PostgresChainIdChecker { 25 | pub fn new(db_pool: ArcDbPool) -> Self { 26 | Self { db_pool } 27 | } 28 | } 29 | 30 | #[async_trait] 31 | impl ChainIdChecker for PostgresChainIdChecker { 32 | async fn save_chain_id(&self, chain_id: u64) -> Result<()> { 33 | let mut conn = self 34 | .db_pool 35 | .get() 36 | .await 37 | .context("Error getting db connection")?; 38 | execute_with_better_error_conn( 39 | &mut conn, 40 | diesel::insert_into(ledger_infos::table) 41 | .values(LedgerInfo { 42 | chain_id: chain_id as i64, 43 | }) 44 | .on_conflict_do_nothing(), 45 | ) 46 | .await 47 | .context("Error updating chain_id!")?; 48 | Ok(()) 49 | } 50 | 51 | async fn get_chain_id(&self) -> Result> { 52 | let mut conn = self.db_pool.get().await?; 53 | let maybe_existing_chain_id = LedgerInfo::get(&mut conn) 54 | .await? 55 | .map(|li| li.chain_id as u64); 56 | Ok(maybe_existing_chain_id) 57 | } 58 | } 59 | 60 | /// A trait implementation of ProcessorStatusSaver for Postgres. 61 | pub struct PostgresProcessorStatusSaver { 62 | pub db_pool: ArcDbPool, 63 | pub processor_name: String, 64 | } 65 | 66 | impl PostgresProcessorStatusSaver { 67 | pub fn new(processor_name: &str, db_pool: ArcDbPool) -> Self { 68 | Self { 69 | db_pool, 70 | processor_name: processor_name.to_string(), 71 | } 72 | } 73 | } 74 | 75 | #[async_trait] 76 | impl ProcessorStatusSaver for PostgresProcessorStatusSaver { 77 | async fn save_processor_status( 78 | &self, 79 | last_success_batch: &TransactionContext<()>, 80 | ) -> Result<(), ProcessorError> { 81 | let last_success_version = last_success_batch.metadata.end_version as i64; 82 | let last_transaction_timestamp = last_success_batch 83 | .metadata 84 | .end_transaction_timestamp 85 | .as_ref() 86 | .map(|t| parse_timestamp(t, last_success_batch.metadata.end_version as i64)) 87 | .map(|t| t.naive_utc()); 88 | let status = ProcessorStatus { 89 | processor: self.processor_name.clone(), 90 | last_success_version, 91 | last_transaction_timestamp, 92 | }; 93 | 94 | // Save regular processor status to the database 95 | execute_with_better_error( 96 | self.db_pool.clone(), 97 | diesel::insert_into(processor_status::table) 98 | .values(&status) 99 | .on_conflict(processor_status::processor) 100 | .do_update() 101 | .set(( 102 | processor_status::last_success_version 103 | .eq(excluded(processor_status::last_success_version)), 104 | processor_status::last_updated.eq(excluded(processor_status::last_updated)), 105 | processor_status::last_transaction_timestamp 106 | .eq(excluded(processor_status::last_transaction_timestamp)), 107 | )) 108 | .filter( 109 | processor_status::last_success_version 110 | .le(excluded(processor_status::last_success_version)), 111 | ), 112 | ) 113 | .await?; 114 | Ok(()) 115 | } 116 | } 117 | 118 | pub async fn get_starting_version( 119 | processor_name: &str, 120 | transaction_stream_config: TransactionStreamConfig, 121 | conn_pool: ArcDbPool, 122 | ) -> Result { 123 | let mut conn = conn_pool.get().await?; 124 | let latest_processed_version = 125 | ProcessorStatusQuery::get_by_processor(processor_name, &mut conn) 126 | .await? 127 | .map(|ps| ps.last_success_version as u64); 128 | // If nothing checkpointed, return the `starting_version` from the config, or 0 if not set. 129 | Ok(latest_processed_version.unwrap_or(transaction_stream_config.starting_version.unwrap_or(0))) 130 | } 131 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/version_tracker_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use anyhow::Result; 9 | use async_trait::async_trait; 10 | use std::marker::PhantomData; 11 | 12 | pub const DEFAULT_UPDATE_PROCESSOR_STATUS_SECS: u64 = 1; 13 | 14 | /// The `ProcessorStatusSaver` trait object should be implemented in order to save the latest successfully 15 | /// processed transaction versino to storage. I.e., persisting the `processor_status` to storage. 16 | #[async_trait] 17 | pub trait ProcessorStatusSaver { 18 | // T represents the transaction type that the processor is tracking. 19 | async fn save_processor_status( 20 | &self, 21 | last_success_batch: &TransactionContext<()>, 22 | ) -> Result<(), ProcessorError>; 23 | } 24 | 25 | /// Tracks the versioned processing of sequential transactions, ensuring no gaps 26 | /// occur between them. 27 | /// 28 | /// Important: this step assumes ordered transactions. Please use the `OrederByVersionStep` before this step 29 | /// if the transactions are not ordered. 30 | pub struct VersionTrackerStep 31 | where 32 | Self: Sized + Send + 'static, 33 | T: Send + 'static, 34 | S: ProcessorStatusSaver + Send + 'static, 35 | { 36 | // Last successful batch of sequentially processed transactions. Includes metadata to write to storage. 37 | last_success_batch: Option>, 38 | polling_interval_secs: u64, 39 | processor_status_saver: S, 40 | _marker: PhantomData, 41 | } 42 | 43 | impl VersionTrackerStep 44 | where 45 | Self: Sized + Send + 'static, 46 | T: Send + 'static, 47 | S: ProcessorStatusSaver + Send + 'static, 48 | { 49 | pub fn new(processor_status_saver: S, polling_interval_secs: u64) -> Self { 50 | Self { 51 | last_success_batch: None, 52 | processor_status_saver, 53 | polling_interval_secs, 54 | _marker: PhantomData, 55 | } 56 | } 57 | 58 | async fn save_processor_status(&mut self) -> Result<(), ProcessorError> { 59 | if let Some(last_success_batch) = self.last_success_batch.as_ref() { 60 | self.processor_status_saver 61 | .save_processor_status(last_success_batch) 62 | .await 63 | } else { 64 | Ok(()) 65 | } 66 | } 67 | } 68 | 69 | #[async_trait] 70 | impl Processable for VersionTrackerStep 71 | where 72 | Self: Sized + Send + 'static, 73 | T: Send + 'static, 74 | S: ProcessorStatusSaver + Send + 'static, 75 | { 76 | type Input = T; 77 | type Output = T; 78 | type RunType = PollableAsyncRunType; 79 | 80 | async fn process( 81 | &mut self, 82 | current_batch: TransactionContext, 83 | ) -> Result>, ProcessorError> { 84 | // If there's a gap in version, return an error 85 | if let Some(last_success_batch) = self.last_success_batch.as_ref() { 86 | if last_success_batch.metadata.end_version + 1 != current_batch.metadata.start_version { 87 | return Err(ProcessorError::ProcessError { 88 | message: format!( 89 | "Gap detected starting from version: {}", 90 | current_batch.metadata.start_version 91 | ), 92 | }); 93 | } 94 | } 95 | 96 | // Update the last success batch 97 | self.last_success_batch = Some(TransactionContext { 98 | data: (), 99 | metadata: current_batch.metadata.clone(), 100 | }); 101 | 102 | // Pass through 103 | Ok(Some(current_batch)) 104 | } 105 | 106 | async fn cleanup( 107 | &mut self, 108 | ) -> Result>>, ProcessorError> { 109 | // If processing or polling ends, save the last successful batch to the database. 110 | self.save_processor_status().await?; 111 | Ok(None) 112 | } 113 | } 114 | 115 | #[async_trait] 116 | impl PollableAsyncStep for VersionTrackerStep 117 | where 118 | Self: Sized + Send + Sync + 'static, 119 | T: Send + Sync + 'static, 120 | S: ProcessorStatusSaver + Send + Sync + 'static, 121 | { 122 | fn poll_interval(&self) -> std::time::Duration { 123 | std::time::Duration::from_secs(self.polling_interval_secs) 124 | } 125 | 126 | async fn poll(&mut self) -> Result>>, ProcessorError> { 127 | // TODO: Add metrics for gap count 128 | self.save_processor_status().await?; 129 | // Nothing should be returned 130 | Ok(None) 131 | } 132 | } 133 | 134 | impl NamedStep for VersionTrackerStep 135 | where 136 | Self: Sized + Send + 'static, 137 | T: Send + 'static, 138 | S: ProcessorStatusSaver + Send + 'static, 139 | { 140 | fn name(&self) -> String { 141 | format!("VersionTrackerStep: {}", std::any::type_name::()) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/convert.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Helpers related to basic conversion like string manipulation, converting between 5 | //! number types, BCS, and hashing. 6 | 7 | use bigdecimal::{BigDecimal, Signed, ToPrimitive, Zero}; 8 | use serde::{Deserialize, Deserializer}; 9 | use serde_json::Value; 10 | use std::str::FromStr; 11 | use tiny_keccak::{Hasher, Sha3}; 12 | 13 | #[allow(clippy::too_long_first_doc_paragraph)] 14 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string). 15 | pub fn standardize_address(handle: &str) -> String { 16 | if let Some(handle) = handle.strip_prefix("0x") { 17 | format!("0x{handle:0>64}") 18 | } else { 19 | format!("0x{handle:0>64}") 20 | } 21 | } 22 | 23 | #[allow(clippy::too_long_first_doc_paragraph)] 24 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string). 25 | pub fn standardize_address_from_bytes(bytes: &[u8]) -> String { 26 | let encoded_bytes = hex::encode(bytes); 27 | standardize_address(&encoded_bytes) 28 | } 29 | 30 | /// Convert a hex string into a raw byte string. Any leading 0x will be stripped. 31 | pub fn hex_to_raw_bytes(val: &str) -> anyhow::Result> { 32 | Ok(hex::decode(val.strip_prefix("0x").unwrap_or(val))?) 33 | } 34 | 35 | /// Truncate a string to a maximum number of characters. 36 | pub fn truncate_str(val: &str, max_chars: usize) -> String { 37 | let mut trunc = val.to_string(); 38 | trunc.truncate(max_chars); 39 | trunc 40 | } 41 | 42 | pub fn sha3_256(buffer: &[u8]) -> [u8; 32] { 43 | let mut output = [0; 32]; 44 | let mut sha3 = Sha3::v256(); 45 | sha3.update(buffer); 46 | sha3.finalize(&mut output); 47 | output 48 | } 49 | 50 | pub fn u64_to_bigdecimal(val: u64) -> BigDecimal { 51 | BigDecimal::from(val) 52 | } 53 | 54 | pub fn bigdecimal_to_u64(val: &BigDecimal) -> u64 { 55 | val.to_u64().expect("Unable to convert big decimal to u64") 56 | } 57 | 58 | pub fn ensure_not_negative(val: BigDecimal) -> BigDecimal { 59 | if val.is_negative() { 60 | return BigDecimal::zero(); 61 | } 62 | val 63 | } 64 | 65 | /// Remove null bytes from a JSON object. 66 | pub fn remove_null_bytes serde::Deserialize<'de>>(input: &T) -> T { 67 | let mut txn_json = serde_json::to_value(input).unwrap(); 68 | recurse_remove_null_bytes_from_json(&mut txn_json); 69 | serde_json::from_value::(txn_json).unwrap() 70 | } 71 | 72 | fn recurse_remove_null_bytes_from_json(sub_json: &mut Value) { 73 | match sub_json { 74 | Value::Array(array) => { 75 | for item in array { 76 | recurse_remove_null_bytes_from_json(item); 77 | } 78 | }, 79 | Value::Object(object) => { 80 | for (_key, value) in object { 81 | recurse_remove_null_bytes_from_json(value); 82 | } 83 | }, 84 | Value::String(str) => { 85 | if !str.is_empty() { 86 | let replacement = string_null_byte_replacement(str); 87 | *str = replacement; 88 | } 89 | }, 90 | _ => {}, 91 | } 92 | } 93 | 94 | fn string_null_byte_replacement(value: &str) -> String { 95 | value.replace('\u{0000}', "").replace("\\u0000", "") 96 | } 97 | 98 | pub fn deserialize_string_from_hexstring<'de, D>( 99 | deserializer: D, 100 | ) -> core::result::Result 101 | where 102 | D: Deserializer<'de>, 103 | { 104 | let s = ::deserialize(deserializer)?; 105 | Ok(String::from_utf8(hex_to_raw_bytes(&s).unwrap()).unwrap_or(s)) 106 | } 107 | 108 | /// Deserialize from string to type T 109 | pub fn deserialize_from_string<'de, D, T>(deserializer: D) -> Result 110 | where 111 | D: Deserializer<'de>, 112 | T: FromStr, 113 | ::Err: std::fmt::Display, 114 | { 115 | use serde::de::Error; 116 | 117 | let s = ::deserialize(deserializer)?; 118 | s.parse::().map_err(D::Error::custom) 119 | } 120 | 121 | /// Convert the bcs serialized vector to its original string format 122 | pub fn convert_bcs_hex(typ: String, value: String) -> Option { 123 | let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?; 124 | 125 | match typ.as_str() { 126 | "0x1::string::String" => bcs::from_bytes::(decoded.as_slice()), 127 | "u8" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 128 | "u64" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 129 | "u128" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 130 | "bool" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 131 | "address" => bcs::from_bytes::(decoded.as_slice()).map(|e| format!("0x{e}")), 132 | _ => Ok(value), 133 | } 134 | .ok() 135 | } 136 | 137 | /// Convert the bcs serialized vector to its original string format for token v2 property map. 138 | pub fn convert_bcs_hex_new(typ: u8, value: String) -> Option { 139 | let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?; 140 | 141 | match typ { 142 | 0 /* bool */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 143 | 1 /* u8 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 144 | 2 /* u16 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 145 | 3 /* u32 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 146 | 4 /* u64 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 147 | 5 /* u128 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 148 | 6 /* u256 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 149 | 7 /* address */ => bcs::from_bytes::(decoded.as_slice()).map(|e| format!("0x{e}")), 150 | 8 /* byte_vector */ => bcs::from_bytes::>(decoded.as_slice()).map(|e| format!("0x{}", hex::encode(e))), 151 | 9 /* string */ => bcs::from_bytes::(decoded.as_slice()), 152 | _ => Ok(value), 153 | } 154 | .ok() 155 | } 156 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sample/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | sync::atomic::{AtomicU64, Ordering}, 3 | time::{Duration, SystemTime}, 4 | }; 5 | 6 | /// ## Sampling logs 7 | /// 8 | /// Sometimes logging a large amount of data is expensive. In order to log information only part 9 | /// of the time, we've added a `sample!` macro that's configurable on how often we want to execute some code. 10 | /// 11 | /// `SampleRate` determines how often the sampled statement will occur. 12 | /// 13 | /// ``` 14 | /// use sample::{sample, SampleRate, Sampling}; 15 | /// use std::time::Duration; 16 | /// use tracing::info; 17 | /// 18 | /// // Sampled based on frequency of events, log only every 2 logs 19 | /// sample!(SampleRate::Frequency(2), info!("Long log")); 20 | /// 21 | /// // Sampled based on time passed, log at most once a minute 22 | /// sample!(SampleRate::Duration(Duration::from_secs(60)), info!("Long log")); 23 | /// ``` 24 | /// The rate at which a `sample!` macro will run it's given function 25 | #[derive(Debug)] 26 | pub enum SampleRate { 27 | /// Only sample a single time during a window of time. This rate only has a resolution in 28 | /// seconds. 29 | Duration(Duration), 30 | /// Sample based on the frequency of the event. The provided u64 is the inverse of the 31 | /// frequency (1/x), for example Frequency(2) means that 1 out of every 2 events will be 32 | /// sampled (1/2). 33 | Frequency(u64), 34 | /// Always Sample 35 | Always, 36 | } 37 | 38 | /// An internal struct that can be checked if a sample is ready for the `sample!` macro 39 | pub struct Sampling { 40 | rate: SampleRate, 41 | state: AtomicU64, 42 | } 43 | 44 | impl Sampling { 45 | pub const fn new(rate: SampleRate) -> Self { 46 | Self { 47 | rate, 48 | state: AtomicU64::new(0), 49 | } 50 | } 51 | 52 | pub fn sample(&self) -> bool { 53 | match &self.rate { 54 | SampleRate::Duration(rate) => Self::sample_duration(rate, &self.state), 55 | SampleRate::Frequency(rate) => Self::sample_frequency(*rate, &self.state), 56 | SampleRate::Always => true, 57 | } 58 | } 59 | 60 | fn sample_frequency(rate: u64, count: &AtomicU64) -> bool { 61 | let previous_count = count 62 | .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |count| { 63 | let new_count = if count == 0 { 64 | rate.saturating_sub(1) 65 | } else { 66 | count.saturating_sub(1) 67 | }; 68 | Some(new_count) 69 | }) 70 | .expect("Closure should always returns 'Some'. This is a Bug."); 71 | 72 | previous_count == 0 73 | } 74 | 75 | fn sample_duration(rate: &Duration, last_sample: &AtomicU64) -> bool { 76 | let rate = rate.as_secs(); 77 | // Seconds since Unix Epoch 78 | let now = SystemTime::now() 79 | .duration_since(SystemTime::UNIX_EPOCH) 80 | .expect("SystemTime before UNIX EPOCH!") 81 | .as_secs(); 82 | 83 | last_sample 84 | .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |last_sample| { 85 | if now.saturating_sub(last_sample) >= rate { 86 | Some(now) 87 | } else { 88 | None 89 | } 90 | }) 91 | .is_ok() 92 | } 93 | } 94 | 95 | /// Samples a given function at a `SampleRate`, useful for periodically emitting logs or metrics on 96 | /// high throughput pieces of code. 97 | #[macro_export] 98 | macro_rules! sample { 99 | ($sample_rate:expr, $($args:expr)+ ,) => { 100 | $crate::sample!($sample_rate, $($args)+); 101 | }; 102 | 103 | ($sample_rate:expr, $($args:tt)+) => {{ 104 | static SAMPLING: $crate::Sampling = $crate::Sampling::new($sample_rate); 105 | if SAMPLING.sample() { 106 | $($args)+ 107 | } 108 | }}; 109 | } 110 | 111 | #[cfg(test)] 112 | mod tests { 113 | use super::*; 114 | 115 | #[test] 116 | fn frequency() { 117 | // Frequency 118 | let sampling = Sampling::new(SampleRate::Frequency(10)); 119 | let mut v = Vec::new(); 120 | for i in 0..=25 { 121 | if sampling.sample() { 122 | v.push(i); 123 | } 124 | } 125 | 126 | assert_eq!(v, vec![0, 10, 20]); 127 | } 128 | 129 | #[test] 130 | fn always() { 131 | // Always 132 | let sampling = Sampling::new(SampleRate::Always); 133 | let mut v = Vec::new(); 134 | for i in 0..5 { 135 | if sampling.sample() { 136 | v.push(i); 137 | } 138 | } 139 | 140 | assert_eq!(v, vec![0, 1, 2, 3, 4]); 141 | } 142 | 143 | #[ignore] 144 | #[test] 145 | fn duration() { 146 | // Duration 147 | let sampling = Sampling::new(SampleRate::Duration(Duration::from_secs(1))); 148 | let mut v = Vec::new(); 149 | for i in 0..5 { 150 | if sampling.sample() { 151 | v.push(i); 152 | } 153 | 154 | std::thread::sleep(Duration::from_millis(500)); 155 | } 156 | 157 | assert_eq!(v.len(), 2); 158 | } 159 | 160 | #[test] 161 | fn macro_expansion() { 162 | for i in 0..10 { 163 | sample!( 164 | SampleRate::Frequency(2), 165 | println!("loooooooooooooooooooooooooong hello {}", i), 166 | ); 167 | 168 | sample!(SampleRate::Frequency(2), { 169 | println!("hello {i}"); 170 | }); 171 | 172 | sample!(SampleRate::Frequency(2), println!("hello {i}")); 173 | 174 | sample! { 175 | SampleRate::Frequency(2), 176 | 177 | for j in 10..20 { 178 | println!("hello {j}"); 179 | } 180 | } 181 | } 182 | } 183 | 184 | #[test] 185 | fn threaded() { 186 | fn work() -> usize { 187 | let mut count = 0; 188 | 189 | for _ in 0..1000 { 190 | sample!(SampleRate::Frequency(5), count += 1); 191 | } 192 | 193 | count 194 | } 195 | 196 | let mut handles = Vec::new(); 197 | for _ in 0..10 { 198 | handles.push(std::thread::spawn(work)); 199 | } 200 | 201 | let mut count = 0; 202 | for handle in handles { 203 | count += handle.join().unwrap(); 204 | } 205 | 206 | assert_eq!(count, 2000); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod channel_metrics; 2 | 3 | use channel_metrics::ChannelMetrics; 4 | use delegate::delegate; 5 | /** 6 | 7 | # Instrumented Channel 8 | This is a wrapper and abstraction over the kanal channel (for now), but it can be extended to support other channels as well. 9 | 10 | The main purpose of this crate is to provide a way to instrument the channel, so that we can track the number of messages sent and received, and the time taken to send and receive messages. 11 | 12 | ## Example 13 | ```rust 14 | use instrumented_channel::instrumented_bounded_channel; 15 | use tokio::time::{sleep, Duration}; 16 | 17 | #[tokio::main] 18 | async fn main() { 19 | let (sender, receiver) = instrumented_bounded_channel("channel_name", 10); 20 | sender.send(42).await.unwrap(); 21 | assert_eq!(receiver.recv().await.unwrap(), 42); 22 | } 23 | ``` 24 | **/ 25 | use kanal::{AsyncReceiver, AsyncSender, ReceiveError, SendError}; 26 | 27 | pub struct InstrumentedAsyncSender { 28 | pub(crate) sender: AsyncSender, 29 | // Metrics 30 | pub(crate) channel_metrics: channel_metrics::ChannelMetrics, 31 | } 32 | 33 | impl InstrumentedAsyncSender { 34 | // shared_send_impl methods 35 | delegate! { 36 | to self.sender { 37 | pub fn is_disconnected(&self) -> bool; 38 | pub fn len(&self) -> usize; 39 | pub fn is_empty(&self) -> bool; 40 | pub fn is_full(&self) -> bool; 41 | pub fn capacity(&self); 42 | pub fn receiver_count(&self) -> u32; 43 | pub fn sender_count(&self) -> u32; 44 | pub fn close(&self) -> bool; 45 | pub fn is_closed(&self) -> bool; 46 | } 47 | } 48 | 49 | pub fn new(sender: AsyncSender, output_of: &str) -> Self { 50 | let channel_metrics = ChannelMetrics::new(output_of.to_string()); 51 | 52 | Self { 53 | sender, 54 | channel_metrics, 55 | } 56 | } 57 | 58 | pub async fn send(&'_ self, data: T) -> Result<(), SendError> { 59 | let send_start = std::time::Instant::now(); 60 | let res = self.sender.send(data).await; 61 | let send_duration = send_start.elapsed(); 62 | 63 | if res.is_err() { 64 | self.channel_metrics 65 | .log_send_duration(send_duration.as_secs_f64()) 66 | .log_channel_size(self.sender.len() as u64) 67 | .inc_failed_sends_count(); 68 | } else { 69 | self.channel_metrics 70 | .log_send_duration(send_duration.as_secs_f64()) 71 | .log_channel_size(self.sender.len() as u64) 72 | .inc_sent_messages_count(); 73 | } 74 | 75 | res 76 | } 77 | } 78 | 79 | impl Clone for InstrumentedAsyncSender { 80 | fn clone(&self) -> Self { 81 | Self { 82 | sender: self.sender.clone(), 83 | channel_metrics: self.channel_metrics.clone(), 84 | } 85 | } 86 | } 87 | 88 | pub struct InstrumentedAsyncReceiver { 89 | pub(crate) receiver: AsyncReceiver, 90 | // Metrics 91 | pub(crate) channel_metrics: ChannelMetrics, 92 | } 93 | 94 | impl InstrumentedAsyncReceiver { 95 | // shared_recv_impl methods 96 | delegate! { 97 | to self.receiver { 98 | pub fn is_disconnected(&self) -> bool; 99 | pub fn len(&self) -> usize; 100 | pub fn is_empty(&self) -> bool; 101 | pub fn is_full(&self) -> bool; 102 | pub fn capacity(&self); 103 | pub fn receiver_count(&self) -> u32; 104 | pub fn sender_count(&self) -> u32; 105 | pub fn close(&self) -> bool; 106 | pub fn is_closed(&self) -> bool; 107 | } 108 | } 109 | 110 | pub fn new(receiver: AsyncReceiver, output_of: &str) -> Self { 111 | let channel_metrics = ChannelMetrics::new(output_of.to_string()); 112 | Self { 113 | receiver, 114 | channel_metrics, 115 | } 116 | } 117 | 118 | pub async fn recv(&'_ self) -> Result { 119 | let receive_start = std::time::Instant::now(); 120 | let result = self.receiver.recv().await; 121 | let receive_duration = receive_start.elapsed(); 122 | 123 | if result.is_err() { 124 | self.channel_metrics 125 | .log_receive_duration(receive_duration.as_secs_f64()) 126 | .log_channel_size(self.receiver.len() as u64) 127 | .inc_failed_receives_count(); 128 | } else { 129 | self.channel_metrics 130 | .log_receive_duration(receive_duration.as_secs_f64()) 131 | .log_channel_size(self.receiver.len() as u64) 132 | .inc_received_messages_count(); 133 | } 134 | 135 | result 136 | } 137 | } 138 | 139 | impl Clone for InstrumentedAsyncReceiver { 140 | fn clone(&self) -> Self { 141 | Self { 142 | receiver: self.receiver.clone(), 143 | channel_metrics: self.channel_metrics.clone(), 144 | } 145 | } 146 | } 147 | 148 | pub fn instrumented_bounded_channel( 149 | output_of: &str, 150 | size: usize, 151 | ) -> (InstrumentedAsyncSender, InstrumentedAsyncReceiver) { 152 | let (sender, receiver) = kanal::bounded_async(size); 153 | ( 154 | InstrumentedAsyncSender::new(sender, output_of), 155 | InstrumentedAsyncReceiver::new(receiver, output_of), 156 | ) 157 | } 158 | 159 | pub fn instrumented_unbounded_channel( 160 | output_of: &str, 161 | ) -> (InstrumentedAsyncSender, InstrumentedAsyncReceiver) { 162 | let (sender, receiver) = kanal::unbounded_async(); 163 | ( 164 | InstrumentedAsyncSender::new(sender, output_of), 165 | InstrumentedAsyncReceiver::new(receiver, output_of), 166 | ) 167 | } 168 | 169 | #[cfg(test)] 170 | mod tests { 171 | use super::*; 172 | use prometheus::Encoder; 173 | 174 | fn gather_metrics_to_string() -> String { 175 | let metrics = prometheus::gather(); 176 | let mut buffer = vec![]; 177 | let encoder = prometheus::TextEncoder::new(); 178 | encoder.encode(&metrics, &mut buffer).unwrap(); 179 | String::from_utf8(buffer).unwrap() 180 | } 181 | #[tokio::test] 182 | #[allow(clippy::needless_return)] 183 | async fn test_instrumented_channel() { 184 | let (sender, receiver) = instrumented_bounded_channel("my_channel", 10); 185 | sender.send(42).await.unwrap(); 186 | sender.send(999).await.unwrap(); 187 | sender.send(3).await.unwrap(); 188 | assert_eq!(receiver.recv().await.unwrap(), 42); 189 | // TODO: check prometheus metrics 190 | let metrics = gather_metrics_to_string(); 191 | println!("{metrics}"); 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/async_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | processable::RunnableStepType, IntoRunnableStep, NamedStep, Processable, RunnableStep, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::step_metrics::{StepMetricLabels, StepMetricsBuilder}, 7 | }; 8 | use async_trait::async_trait; 9 | use bigdecimal::Zero; 10 | use instrumented_channel::{ 11 | instrumented_bounded_channel, InstrumentedAsyncReceiver, InstrumentedAsyncSender, 12 | }; 13 | use std::time::{Duration, Instant}; 14 | use tokio::task::JoinHandle; 15 | use tracing::{error, info, warn}; 16 | 17 | #[async_trait] 18 | pub trait AsyncStep 19 | where 20 | Self: Processable + Send + Sized + 'static, 21 | { 22 | } 23 | 24 | pub struct AsyncRunType; 25 | 26 | impl RunnableStepType for AsyncRunType {} 27 | 28 | pub struct RunnableAsyncStep 29 | where 30 | Step: AsyncStep, 31 | { 32 | pub step: Step, 33 | } 34 | 35 | impl RunnableAsyncStep 36 | where 37 | Step: AsyncStep, 38 | { 39 | pub fn new(step: Step) -> Self { 40 | Self { step } 41 | } 42 | } 43 | 44 | impl NamedStep for RunnableAsyncStep 45 | where 46 | Step: 'static + AsyncStep + Send + Sized, 47 | { 48 | fn name(&self) -> String { 49 | self.step.name() 50 | } 51 | 52 | fn type_name(&self) -> String { 53 | let step_type = std::any::type_name::().to_string(); 54 | format!("{step_type} (via RunnableAsyncStep)",) 55 | } 56 | } 57 | 58 | impl IntoRunnableStep for Step 59 | where 60 | Step: AsyncStep + Send + Sized + 'static, 61 | { 62 | fn into_runnable_step(self) -> impl RunnableStep { 63 | RunnableAsyncStep::new(self) 64 | } 65 | } 66 | 67 | impl RunnableStep for RunnableAsyncStep 68 | where 69 | Step: AsyncStep + Send + Sized + 'static, 70 | { 71 | fn spawn( 72 | self, 73 | input_receiver: Option>>, 74 | output_channel_size: usize, 75 | _input_sender: Option>>, 76 | ) -> ( 77 | InstrumentedAsyncReceiver>, 78 | JoinHandle<()>, 79 | ) { 80 | let mut step = self.step; 81 | let step_name = step.name(); 82 | let input_receiver = input_receiver.expect("Input receiver must be set"); 83 | 84 | let (output_sender, output_receiver) = 85 | instrumented_bounded_channel(&step_name, output_channel_size); 86 | 87 | info!(step_name = step_name, "Spawning processing task"); 88 | let handle = tokio::spawn(async move { 89 | loop { 90 | let input_with_context = match input_receiver.recv().await { 91 | Ok(input_with_context) => input_with_context, 92 | Err(e) => { 93 | // If the previous steps have finished and the channels have closed , we should break out of the loop 94 | warn!( 95 | step_name = step_name, 96 | error = e.to_string(), 97 | "No input received from channel" 98 | ); 99 | break; 100 | }, 101 | }; 102 | let processing_duration = Instant::now(); 103 | let output_with_context = match step.process(input_with_context).await { 104 | Ok(output_with_context) => output_with_context, 105 | Err(e) => { 106 | error!( 107 | step_name = step_name, 108 | error = e.to_string(), 109 | "Failed to process input" 110 | ); 111 | break; 112 | }, 113 | }; 114 | if let Some(output_with_context) = output_with_context { 115 | match StepMetricsBuilder::default() 116 | .labels(StepMetricLabels { 117 | step_name: step.name(), 118 | }) 119 | .latest_processed_version(output_with_context.metadata.end_version) 120 | .processed_transaction_latency( 121 | output_with_context.get_transaction_latency(), 122 | ) 123 | .latest_transaction_timestamp( 124 | output_with_context.get_start_transaction_timestamp_unix(), 125 | ) 126 | .num_transactions_processed_count( 127 | output_with_context.get_num_transactions(), 128 | ) 129 | .processing_duration_in_secs(processing_duration.elapsed().as_secs_f64()) 130 | .processed_size_in_bytes(output_with_context.metadata.total_size_in_bytes) 131 | .build() 132 | { 133 | Ok(mut metrics) => metrics.log_metrics(), 134 | Err(e) => { 135 | error!( 136 | step_name = step_name, 137 | error = e.to_string(), 138 | "Failed to log metrics" 139 | ); 140 | break; 141 | }, 142 | } 143 | match output_sender.send(output_with_context).await { 144 | Ok(_) => (), 145 | Err(e) => { 146 | error!( 147 | step_name = step_name, 148 | error = e.to_string(), 149 | "Error sending output to channel" 150 | ); 151 | break; 152 | }, 153 | } 154 | } 155 | } 156 | 157 | // Wait for output channel to be empty before ending the task and closing the send channel 158 | loop { 159 | let channel_size = output_sender.len(); 160 | info!( 161 | step_name = step_name, 162 | channel_size = channel_size, 163 | "Waiting for output channel to be empty" 164 | ); 165 | if channel_size.is_zero() { 166 | break; 167 | } 168 | tokio::time::sleep(Duration::from_millis(100)).await; 169 | } 170 | info!( 171 | step_name = step_name, 172 | "Output channel is empty. Closing send channel." 173 | ); 174 | }); 175 | 176 | (output_receiver, handle) 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_function.rs: -------------------------------------------------------------------------------- 1 | use super::basic_processor_step::BasicProcessorStep; 2 | use crate::{ 3 | aptos_indexer_transaction_stream::TransactionStreamConfig, 4 | builder::ProcessorBuilder, 5 | common_steps::{ 6 | TransactionStreamStep, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS, 7 | }, 8 | postgres::{ 9 | progress::PostgresProgressStatusProvider, 10 | subconfigs::postgres_config::PostgresConfig, 11 | utils::{ 12 | checkpoint::{ 13 | get_starting_version, PostgresChainIdChecker, PostgresProcessorStatusSaver, 14 | }, 15 | database::{new_db_pool, run_migrations, ArcDbPool}, 16 | }, 17 | SDK_MIGRATIONS, 18 | }, 19 | server_framework::{ 20 | load, register_probes_and_metrics_handler, setup_logging, setup_panic_handler, 21 | GenericConfig, HealthCheck, ProgressHealthChecker, ProgressHealthConfig, ServerArgs, 22 | }, 23 | traits::IntoRunnableStep, 24 | utils::{chain_id_check::check_or_update_chain_id, errors::ProcessorError}, 25 | }; 26 | use anyhow::Result; 27 | use aptos_protos::transaction::v1::Transaction; 28 | use clap::Parser; 29 | use diesel_migrations::EmbeddedMigrations; 30 | use serde::{Deserialize, Serialize}; 31 | use std::sync::Arc; 32 | use tracing::info; 33 | 34 | #[derive(Clone, Debug, Deserialize, Serialize)] 35 | #[serde(deny_unknown_fields)] 36 | pub struct ProcessConfig { 37 | pub transaction_stream_config: TransactionStreamConfig, 38 | pub postgres_config: PostgresConfig, 39 | /// Optional configuration for progress health checking. 40 | /// If provided, the `/healthz` endpoint will check if the processor is making progress. 41 | #[serde(default)] 42 | pub progress_health_config: Option, 43 | } 44 | 45 | /// Processes transactions with a custom handler function. 46 | pub async fn process( 47 | processor_name: String, 48 | embedded_migrations: EmbeddedMigrations, 49 | process_function: F, 50 | ) -> Result<()> 51 | where 52 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + Sync + 'static, 53 | Fut: std::future::Future> + Send + 'static, 54 | { 55 | let args = ServerArgs::parse(); 56 | setup_logging(); 57 | setup_panic_handler(); 58 | let config = load::>(&args.config_path)?; 59 | let handle = tokio::runtime::Handle::current(); 60 | 61 | let health_port = config.health_check_port; 62 | let additional_labels = config.metrics_config.additional_labels.clone(); 63 | let progress_health_config = config.server_config.progress_health_config.clone(); 64 | 65 | let db_pool = new_db_pool( 66 | &config.server_config.postgres_config.connection_string, 67 | Some(config.server_config.postgres_config.db_pool_size), 68 | ) 69 | .await 70 | .expect("Failed to create connection pool"); 71 | 72 | // Build health checks. 73 | let mut health_checks: Vec> = vec![]; 74 | if let Some(progress_config) = progress_health_config { 75 | let status_provider = 76 | PostgresProgressStatusProvider::new(processor_name.clone(), db_pool.clone()); 77 | let progress_checker = ProgressHealthChecker::new( 78 | processor_name.clone(), 79 | Box::new(status_provider), 80 | progress_config, 81 | ); 82 | health_checks.push(Arc::new(progress_checker)); 83 | } 84 | 85 | // Start health and metrics probes. 86 | let task_handler = handle.spawn(async move { 87 | register_probes_and_metrics_handler(health_port, additional_labels, health_checks).await; 88 | anyhow::Ok(()) 89 | }); 90 | let main_task_handler = handle.spawn(async move { 91 | run_processor( 92 | processor_name, 93 | config.server_config.transaction_stream_config, 94 | config.server_config.postgres_config, 95 | embedded_migrations, 96 | db_pool, 97 | process_function, 98 | ) 99 | .await 100 | }); 101 | tokio::select! { 102 | res = task_handler => { 103 | res.expect("Probes and metrics handler unexpectedly exited") 104 | }, 105 | res = main_task_handler => { 106 | res.expect("Main task handler unexpectedly exited") 107 | }, 108 | } 109 | } 110 | 111 | pub async fn run_processor( 112 | processor_name: String, 113 | transaction_stream_config: TransactionStreamConfig, 114 | postgres_config: PostgresConfig, 115 | embedded_migrations: EmbeddedMigrations, 116 | db_pool: ArcDbPool, 117 | process_function: F, 118 | ) -> Result<()> 119 | where 120 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + Sync + 'static, 121 | Fut: std::future::Future> + Send + 'static, 122 | { 123 | // Run user migrations. 124 | run_migrations( 125 | postgres_config.connection_string.clone(), 126 | db_pool.clone(), 127 | embedded_migrations, 128 | ) 129 | .await; 130 | 131 | // Run SDK migrations. 132 | run_migrations( 133 | postgres_config.connection_string.clone(), 134 | db_pool.clone(), 135 | SDK_MIGRATIONS, 136 | ) 137 | .await; 138 | 139 | check_or_update_chain_id( 140 | &transaction_stream_config, 141 | &PostgresChainIdChecker::new(db_pool.clone()), 142 | ) 143 | .await?; 144 | 145 | // Merge the starting version from config and the latest processed version from the DB. 146 | let starting_version = get_starting_version( 147 | processor_name.as_str(), 148 | transaction_stream_config.clone(), 149 | db_pool.clone(), 150 | ) 151 | .await?; 152 | 153 | // Define processor steps. 154 | let transaction_stream_config = transaction_stream_config.clone(); 155 | let transaction_stream = TransactionStreamStep::new(TransactionStreamConfig { 156 | starting_version: Some(starting_version), 157 | ..transaction_stream_config 158 | }) 159 | .await?; 160 | let basic_processor_step = BasicProcessorStep { 161 | process_function, 162 | conn_pool: db_pool.clone(), 163 | }; 164 | let processor_status_saver = 165 | PostgresProcessorStatusSaver::new(processor_name.as_str(), db_pool.clone()); 166 | let version_tracker = 167 | VersionTrackerStep::new(processor_status_saver, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS); 168 | 169 | // Connect processor steps together. 170 | let (_, buffer_receiver) = 171 | ProcessorBuilder::new_with_inputless_first_step(transaction_stream.into_runnable_step()) 172 | .connect_to(basic_processor_step.into_runnable_step(), 10) 173 | .connect_to(version_tracker.into_runnable_step(), 10) 174 | .end_and_return_output_receiver(10); 175 | 176 | // (Optional) Parse the results. 177 | loop { 178 | match buffer_receiver.recv().await { 179 | Ok(_) => {}, 180 | Err(_) => { 181 | info!("Channel is closed"); 182 | return Ok(()); 183 | }, 184 | } 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/order_by_version_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use ahash::AHashMap; 9 | use anyhow::Result; 10 | use async_trait::async_trait; 11 | use std::time::Duration; 12 | 13 | /// OrderByVersionStep is a step that orders TransactionContexts by their starting versions. 14 | /// It buffers ordered TransactionContexts and releases them at every poll_interval. 15 | pub struct OrderByVersionStep 16 | where 17 | Self: Sized + Send + 'static, 18 | Input: Send + 'static, 19 | { 20 | pub ordered_versions: Vec>, 21 | pub unordered_versions: AHashMap>, 22 | pub expected_next_version: u64, 23 | // Duration to poll and return the ordered versions 24 | pub poll_interval: Duration, 25 | } 26 | 27 | impl OrderByVersionStep 28 | where 29 | Self: Sized + Send + 'static, 30 | Input: Send + 'static, 31 | { 32 | pub fn new(starting_version: u64, poll_interval: Duration) -> Self { 33 | Self { 34 | ordered_versions: Vec::new(), 35 | unordered_versions: AHashMap::new(), 36 | expected_next_version: starting_version, 37 | poll_interval, 38 | } 39 | } 40 | 41 | fn update_ordered_versions(&mut self) { 42 | // While there are batches in unordered_versions that are in order, add them to ordered_versions 43 | while let Some(batch) = self 44 | .unordered_versions 45 | .remove(&(self.expected_next_version)) 46 | { 47 | self.expected_next_version = batch.metadata.end_version + 1; 48 | self.ordered_versions.push(batch); 49 | } 50 | } 51 | } 52 | 53 | #[async_trait] 54 | impl Processable for OrderByVersionStep 55 | where 56 | Input: Send + Sync + 'static, 57 | { 58 | type Input = Input; 59 | type Output = Input; 60 | type RunType = PollableAsyncRunType; 61 | 62 | async fn process( 63 | &mut self, 64 | current_batch: TransactionContext, 65 | ) -> Result>, ProcessorError> { 66 | // If there's a gap in the expected_next_version and current_version 67 | // have the current_version to unordered_versions for later processing. 68 | if self.expected_next_version != current_batch.metadata.start_version { 69 | tracing::debug!( 70 | next_version = self.expected_next_version, 71 | step = self.name(), 72 | "Gap detected starting from version: {}", 73 | current_batch.metadata.start_version 74 | ); 75 | self.unordered_versions 76 | .insert(current_batch.metadata.start_version, current_batch); 77 | } else { 78 | tracing::debug!("No gap detected"); 79 | self.expected_next_version = current_batch.metadata.end_version + 1; 80 | self.ordered_versions.push(current_batch); 81 | 82 | // If the current_versions is the expected_next_version, update the ordered_versions 83 | self.update_ordered_versions(); 84 | } 85 | 86 | // TODO: Consider adding a metric for the number of unordered_versions for debugging, performance tesing 87 | 88 | // Pass through 89 | Ok(None) // No immediate output 90 | } 91 | 92 | // Once polling ends, release the remaining ordered items in buffer 93 | async fn cleanup( 94 | &mut self, 95 | ) -> Result>>, ProcessorError> { 96 | Ok(Some(std::mem::take(&mut self.ordered_versions))) 97 | } 98 | } 99 | 100 | #[async_trait] 101 | impl PollableAsyncStep for OrderByVersionStep { 102 | fn poll_interval(&self) -> Duration { 103 | self.poll_interval 104 | } 105 | 106 | async fn poll(&mut self) -> Result>>, ProcessorError> { 107 | Ok(Some(std::mem::take(&mut self.ordered_versions))) 108 | } 109 | } 110 | 111 | impl NamedStep for OrderByVersionStep { 112 | // TODO: oncecell this somehow? Likely in wrapper struct... 113 | fn name(&self) -> String { 114 | format!("OrderByVersionStep: {}", std::any::type_name::()) 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use super::*; 121 | use crate::{ 122 | builder::ProcessorBuilder, 123 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 124 | traits::{IntoRunnableStep, RunnableStepWithInputReceiver}, 125 | types::transaction_context::TransactionMetadata, 126 | }; 127 | use instrumented_channel::instrumented_bounded_channel; 128 | 129 | fn generate_unordered_transaction_contexts() -> Vec> { 130 | vec![ 131 | TransactionContext { 132 | data: (), 133 | metadata: TransactionMetadata { 134 | start_version: 100, 135 | end_version: 199, 136 | start_transaction_timestamp: None, 137 | end_transaction_timestamp: None, 138 | total_size_in_bytes: 0, 139 | }, 140 | }, 141 | TransactionContext { 142 | data: (), 143 | metadata: TransactionMetadata { 144 | start_version: 0, 145 | end_version: 99, 146 | start_transaction_timestamp: None, 147 | end_transaction_timestamp: None, 148 | total_size_in_bytes: 0, 149 | }, 150 | }, 151 | ] 152 | } 153 | 154 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 155 | #[allow(clippy::needless_return)] 156 | async fn test_order_step() { 157 | // Setup 158 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 159 | let input_step = RunnableStepWithInputReceiver::new( 160 | input_receiver, 161 | PassThroughStep::default().into_runnable_step(), 162 | ); 163 | let order_step = OrderByVersionStep::<()>::new(0, Duration::from_millis(250)); 164 | 165 | let (_pb, mut output_receiver) = 166 | ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 167 | .connect_to(order_step.into_runnable_step(), 5) 168 | .end_and_return_output_receiver(5); 169 | 170 | let unordered_transaction_contexts = generate_unordered_transaction_contexts(); 171 | let mut ordered_transaction_contexts = unordered_transaction_contexts.clone(); 172 | ordered_transaction_contexts.sort(); 173 | 174 | for transaction_context in unordered_transaction_contexts { 175 | input_sender.send(transaction_context).await.unwrap(); 176 | } 177 | tokio::time::sleep(Duration::from_millis(500)).await; 178 | 179 | for ordered_transaction_context in ordered_transaction_contexts { 180 | let result = receive_with_timeout(&mut output_receiver, 100) 181 | .await 182 | .unwrap(); 183 | assert_eq!( 184 | result.metadata.start_version, 185 | ordered_transaction_context.metadata.start_version 186 | ); 187 | } 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod builder; 2 | pub mod common_steps; // TODO: Feature gate this? 3 | #[cfg(feature = "server_framework")] 4 | pub mod health; 5 | #[cfg(feature = "postgres_partial")] 6 | pub mod postgres; 7 | #[cfg(feature = "server_framework")] 8 | pub mod server_framework; 9 | pub mod test; 10 | #[cfg(feature = "testing_framework")] 11 | pub mod testing_framework; 12 | pub mod traits; 13 | pub mod types; 14 | pub mod utils; 15 | 16 | // Re-exporting crates to provide a cohesive SDK interface 17 | pub use aptos_indexer_transaction_stream; 18 | pub use aptos_protos; 19 | pub use aptos_transaction_filter; 20 | pub use bcs; 21 | pub use instrumented_channel; 22 | 23 | #[cfg(test)] 24 | mod tests { 25 | use crate::{ 26 | builder::ProcessorBuilder, 27 | common_steps::TimedBufferStep, 28 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 29 | traits::{ 30 | AsyncStep, IntoRunnableStep, NamedStep, Processable, RunnableAsyncStep, 31 | RunnableStepWithInputReceiver, 32 | }, 33 | types::transaction_context::{TransactionContext, TransactionMetadata}, 34 | utils::errors::ProcessorError, 35 | }; 36 | use anyhow::Result; 37 | use async_trait::async_trait; 38 | use instrumented_channel::instrumented_bounded_channel; 39 | use std::time::Duration; 40 | 41 | #[derive(Clone, Debug, PartialEq)] 42 | pub struct TestStruct { 43 | pub i: usize, 44 | } 45 | 46 | fn make_test_structs(num: usize) -> Vec { 47 | (1..(num + 1)).map(|i| TestStruct { i }).collect() 48 | } 49 | 50 | pub struct TestStep; 51 | 52 | impl AsyncStep for TestStep {} 53 | 54 | impl NamedStep for TestStep { 55 | fn name(&self) -> String { 56 | "TestStep".to_string() 57 | } 58 | } 59 | 60 | #[async_trait] 61 | impl Processable for TestStep { 62 | type Input = Vec; 63 | type Output = Vec; 64 | type RunType = (); 65 | 66 | async fn process( 67 | &mut self, 68 | item: TransactionContext>, 69 | ) -> Result>>, ProcessorError> { 70 | let processed = item.data.into_iter().map(|i| TestStruct { i }).collect(); 71 | Ok(Some(TransactionContext { 72 | data: processed, 73 | metadata: item.metadata, 74 | })) 75 | } 76 | } 77 | 78 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 79 | #[allow(clippy::needless_return)] 80 | async fn test_connect_two_steps() { 81 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 82 | 83 | let input_step = RunnableStepWithInputReceiver::new( 84 | input_receiver, 85 | RunnableAsyncStep::new(PassThroughStep::default()), 86 | ); 87 | 88 | // Create a timed buffer that outputs the input after 1 second 89 | let timed_buffer_step = TimedBufferStep::>::new(Duration::from_millis(200)); 90 | let first_step = timed_buffer_step; 91 | 92 | let second_step = TestStep; 93 | let second_step = RunnableAsyncStep::new(second_step); 94 | 95 | let builder = ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 96 | .connect_to(first_step.into_runnable_step(), 5) 97 | .connect_to(second_step, 3); 98 | 99 | let mut fanout_builder = builder.fanout_broadcast(2); 100 | let (_, first_output_receiver) = fanout_builder 101 | .get_processor_builder() 102 | .unwrap() 103 | .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 1) 104 | .end_and_return_output_receiver(1); 105 | 106 | let (second_builder, second_output_receiver) = fanout_builder 107 | .get_processor_builder() 108 | .unwrap() 109 | .connect_to( 110 | RunnableAsyncStep::new(PassThroughStep::new_named("MaxStep".to_string())), 111 | 2, 112 | ) 113 | .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 5) 114 | .end_and_return_output_receiver(5); 115 | 116 | let mut output_receivers = [first_output_receiver, second_output_receiver]; 117 | 118 | output_receivers.iter().for_each(|output_receiver| { 119 | assert_eq!(output_receiver.len(), 0, "Output should be empty"); 120 | }); 121 | 122 | let left_input = TransactionContext { 123 | data: vec![1, 2, 3], 124 | metadata: TransactionMetadata { 125 | start_version: 0, 126 | end_version: 1, 127 | start_transaction_timestamp: None, 128 | end_transaction_timestamp: None, 129 | total_size_in_bytes: 0, 130 | }, 131 | }; 132 | input_sender.send(left_input.clone()).await.unwrap(); 133 | tokio::time::sleep(Duration::from_millis(250)).await; 134 | 135 | output_receivers.iter().for_each(|output_receiver| { 136 | assert_eq!(output_receiver.len(), 1, "Output should have 1 item"); 137 | }); 138 | 139 | for output_receiver in output_receivers.iter_mut() { 140 | let result = receive_with_timeout(output_receiver, 100).await.unwrap(); 141 | 142 | assert_eq!( 143 | result.data, 144 | make_test_structs(3), 145 | "Output should be the same as input" 146 | ); 147 | } 148 | 149 | let graph = second_builder.graph; 150 | let dot = graph.dot(); 151 | println!("{dot:}"); 152 | //first_handle.abort(); 153 | //second_handle.abort(); 154 | } 155 | 156 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 157 | #[allow(clippy::needless_return)] 158 | async fn test_fanin() { 159 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 160 | 161 | let input_step = RunnableStepWithInputReceiver::new( 162 | input_receiver, 163 | RunnableAsyncStep::new(PassThroughStep::default()), 164 | ); 165 | 166 | let mut fanout_builder = 167 | ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 168 | .fanout_broadcast(2); 169 | 170 | let (first_builder, first_output_receiver) = fanout_builder 171 | .get_processor_builder() 172 | .unwrap() 173 | .connect_to( 174 | RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep1".to_string())), 175 | 5, 176 | ) 177 | .end_and_return_output_receiver(5); 178 | 179 | let (second_builder, second_output_receiver) = fanout_builder 180 | .get_processor_builder() 181 | .unwrap() 182 | .connect_to( 183 | RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep2".to_string())), 184 | 5, 185 | ) 186 | .end_and_return_output_receiver(5); 187 | 188 | let test_step = TestStep; 189 | let test_step = RunnableAsyncStep::new(test_step); 190 | 191 | let (_, mut fanin_output_receiver) = ProcessorBuilder::new_with_fanin_step_with_receivers( 192 | vec![ 193 | (first_output_receiver, first_builder.graph), 194 | (second_output_receiver, second_builder.graph), 195 | ], 196 | RunnableAsyncStep::new(PassThroughStep::new_named("FaninStep".to_string())), 197 | 3, 198 | ) 199 | .connect_to(test_step, 10) 200 | .end_and_return_output_receiver(6); 201 | 202 | assert_eq!(fanin_output_receiver.len(), 0, "Output should be empty"); 203 | 204 | let left_input = TransactionContext { 205 | data: vec![1, 2, 3], 206 | metadata: TransactionMetadata { 207 | start_version: 0, 208 | end_version: 1, 209 | start_transaction_timestamp: None, 210 | end_transaction_timestamp: None, 211 | total_size_in_bytes: 0, 212 | }, 213 | }; 214 | input_sender.send(left_input.clone()).await.unwrap(); 215 | tokio::time::sleep(Duration::from_millis(250)).await; 216 | 217 | assert_eq!(fanin_output_receiver.len(), 2, "Output should have 2 items"); 218 | 219 | for _ in 0..2 { 220 | let result = receive_with_timeout(&mut fanin_output_receiver, 100) 221 | .await 222 | .unwrap(); 223 | 224 | assert_eq!( 225 | result.data, 226 | make_test_structs(3), 227 | "Output should be the same as input" 228 | ); 229 | } 230 | 231 | let graph = fanout_builder.graph; 232 | let dot = graph.dot(); 233 | println!("{dot:}"); 234 | //first_handle.abort(); 235 | //second_handle.abort(); 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/transaction_stream_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{NamedStep, PollableAsyncRunType, PollableAsyncStep, Processable}, 3 | types::transaction_context::{TransactionContext, TransactionMetadata}, 4 | utils::errors::ProcessorError, 5 | }; 6 | use anyhow::Result; 7 | use aptos_indexer_transaction_stream::{ 8 | TransactionStream as TransactionStreamInternal, TransactionStreamConfig, 9 | }; 10 | use aptos_protos::transaction::v1::Transaction; 11 | use async_trait::async_trait; 12 | use mockall::mock; 13 | use std::time::Duration; 14 | use tokio::sync::Mutex; 15 | use tracing::{error, info, warn}; 16 | 17 | // TransactionStreamStep is establishes a gRPC connection with Transaction Stream 18 | // fetches transactions, and outputs them for processing. It also handles reconnections with retries. 19 | // This is usually the initial step in a processor. 20 | pub struct TransactionStreamStep 21 | where 22 | Self: Sized + Send + 'static, 23 | { 24 | transaction_stream_config: TransactionStreamConfig, 25 | pub transaction_stream: Mutex, 26 | } 27 | 28 | impl TransactionStreamStep 29 | where 30 | Self: Sized + Send + 'static, 31 | { 32 | pub async fn new( 33 | transaction_stream_config: TransactionStreamConfig, 34 | ) -> Result { 35 | let transaction_stream_res = 36 | TransactionStreamInternal::new(transaction_stream_config.clone()).await; 37 | match transaction_stream_res { 38 | Err(e) => Err(ProcessorError::StepInitError { 39 | message: format!("Error creating transaction stream: {e:?}"), 40 | }), 41 | Ok(transaction_stream) => Ok(Self { 42 | transaction_stream: Mutex::new(transaction_stream), 43 | transaction_stream_config, 44 | }), 45 | } 46 | } 47 | } 48 | 49 | #[async_trait] 50 | impl Processable for TransactionStreamStep 51 | where 52 | Self: Sized + Send + 'static, 53 | { 54 | type Input = (); 55 | // The TransactionStreamStep will output a batch of transactions for processing 56 | type Output = Vec; 57 | type RunType = PollableAsyncRunType; 58 | 59 | async fn process( 60 | &mut self, 61 | _item: TransactionContext<()>, 62 | ) -> Result>>, ProcessorError> { 63 | Ok(None) 64 | } 65 | } 66 | 67 | #[async_trait] 68 | impl PollableAsyncStep for TransactionStreamStep 69 | where 70 | Self: Sized + Send + Sync + 'static, 71 | { 72 | fn poll_interval(&self) -> std::time::Duration { 73 | Duration::from_secs(0) 74 | } 75 | 76 | async fn poll( 77 | &mut self, 78 | ) -> Result>>>, ProcessorError> { 79 | let txn_pb_response_res = self 80 | .transaction_stream 81 | .lock() 82 | .await 83 | .get_next_transaction_batch() 84 | .await; 85 | match txn_pb_response_res { 86 | Ok(txn_pb_response) => { 87 | let transactions_with_context = TransactionContext { 88 | data: txn_pb_response.transactions, 89 | metadata: TransactionMetadata { 90 | start_version: txn_pb_response.start_version, 91 | end_version: txn_pb_response.end_version, 92 | start_transaction_timestamp: txn_pb_response.start_txn_timestamp, 93 | end_transaction_timestamp: txn_pb_response.end_txn_timestamp, 94 | total_size_in_bytes: txn_pb_response.size_in_bytes, 95 | }, 96 | }; 97 | Ok(Some(vec![transactions_with_context])) 98 | }, 99 | Err(e) => { 100 | warn!( 101 | stream_address = self.transaction_stream_config.indexer_grpc_data_service_address.to_string(), 102 | error = ?e, 103 | "Error fetching transactions from TransactionStream. Attempting to reconnect." 104 | ); 105 | 106 | // TransactionStream closes connections every 5 minutes. We should try to reconnect 107 | match self 108 | .transaction_stream 109 | .lock() 110 | .await 111 | .reconnect_to_grpc_with_retries() 112 | .await 113 | { 114 | Ok(_) => { 115 | info!( 116 | stream_address = self 117 | .transaction_stream_config 118 | .indexer_grpc_data_service_address 119 | .to_string(), 120 | "Successfully reconnected to TransactionStream." 121 | ); 122 | // Return nothing for now. The next poll will fetch the next batch of transactions. 123 | Ok(None) 124 | }, 125 | Err(e) => { 126 | error!( 127 | stream_address = self.transaction_stream_config 128 | .indexer_grpc_data_service_address 129 | .to_string(), 130 | error = ?e, 131 | " Error reconnecting transaction stream." 132 | ); 133 | Err(ProcessorError::PollError { 134 | message: format!("Error reconnecting to TransactionStream: {e:?}"), 135 | }) 136 | }, 137 | } 138 | }, 139 | } 140 | } 141 | 142 | async fn should_continue_polling(&mut self) -> bool { 143 | let is_end = self.transaction_stream.lock().await.is_end_of_stream(); 144 | if is_end { 145 | info!("Reached ending version"); 146 | } 147 | !is_end 148 | } 149 | } 150 | 151 | impl NamedStep for TransactionStreamStep { 152 | fn name(&self) -> String { 153 | "TransactionStreamStep".to_string() 154 | } 155 | } 156 | 157 | mock! { 158 | pub TransactionStreamStep {} 159 | 160 | #[async_trait] 161 | impl Processable for TransactionStreamStep 162 | where Self: Sized + Send + 'static, 163 | { 164 | type Input = (); 165 | type Output = Vec; 166 | type RunType = PollableAsyncRunType; 167 | 168 | async fn init(&mut self); 169 | 170 | async fn process(&mut self, _item: TransactionContext<()> ) -> Result>>, ProcessorError>; 171 | } 172 | 173 | #[async_trait] 174 | impl PollableAsyncStep for TransactionStreamStep 175 | where 176 | Self: Sized + Send + 'static, 177 | { 178 | fn poll_interval(&self) -> std::time::Duration; 179 | 180 | // async fn poll(&mut self) -> Option> { 181 | // // Testing framework can provide mocked transactions here 182 | // Some(vec![TransactionsPBResponse { 183 | // transactions: vec![], 184 | // chain_id: 0, 185 | // start_version: 0, 186 | // end_version: 100, 187 | // start_txn_timestamp: None, 188 | // end_txn_timestamp: None, 189 | // size_in_bytes: 10, 190 | // }]) 191 | // } 192 | async fn poll(&mut self) -> Result>>>, ProcessorError>; 193 | 194 | async fn should_continue_polling(&mut self) -> bool; 195 | } 196 | 197 | impl NamedStep for TransactionStreamStep { 198 | fn name(&self) -> String; 199 | } 200 | } 201 | 202 | #[cfg(test)] 203 | mod tests { 204 | use super::*; 205 | use crate::{ 206 | builder::ProcessorBuilder, 207 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 208 | traits::IntoRunnableStep, 209 | types::transaction_context::TransactionMetadata, 210 | }; 211 | use mockall::Sequence; 212 | use std::time::Duration; 213 | 214 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 215 | #[allow(clippy::needless_return)] 216 | async fn test_transaction_stream() { 217 | let mut mock_transaction_stream = MockTransactionStreamStep::new(); 218 | // Testing framework can provide mocked transactions here 219 | mock_transaction_stream.expect_poll().returning(|| { 220 | Ok(Some(vec![TransactionContext { 221 | data: vec![Transaction::default()], 222 | metadata: TransactionMetadata { 223 | start_version: 0, 224 | end_version: 100, 225 | start_transaction_timestamp: None, 226 | end_transaction_timestamp: None, 227 | total_size_in_bytes: 10, 228 | }, 229 | }])) 230 | }); 231 | mock_transaction_stream 232 | .expect_poll_interval() 233 | .returning(|| Duration::from_secs(0)); 234 | mock_transaction_stream.expect_init().returning(|| { 235 | // Do nothing 236 | }); 237 | mock_transaction_stream 238 | .expect_name() 239 | .returning(|| "MockTransactionStream".to_string()); 240 | 241 | // Set up the mock transaction stream to poll 3 times 242 | let mut seq = Sequence::new(); 243 | mock_transaction_stream 244 | .expect_should_continue_polling() 245 | .times(3) 246 | .in_sequence(&mut seq) 247 | .return_const(true); 248 | mock_transaction_stream 249 | .expect_should_continue_polling() 250 | .return_const(false); 251 | 252 | let pass_through_step = PassThroughStep::default(); 253 | 254 | let (_, mut output_receiver) = ProcessorBuilder::new_with_inputless_first_step( 255 | mock_transaction_stream.into_runnable_step(), 256 | ) 257 | .connect_to(pass_through_step.into_runnable_step(), 5) 258 | .end_and_return_output_receiver(5); 259 | 260 | tokio::time::sleep(Duration::from_millis(250)).await; 261 | for _ in 0..3 { 262 | let result = receive_with_timeout(&mut output_receiver, 100) 263 | .await 264 | .unwrap(); 265 | 266 | assert_eq!(result.data.len(), 1); 267 | } 268 | 269 | // After receiving 3 outputs, the channel should be empty 270 | let result = receive_with_timeout(&mut output_receiver, 100).await; 271 | assert!(result.is_none()); 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/database.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Database-related functions 5 | #![allow(clippy::extra_unused_lifetimes)] 6 | 7 | use crate::utils::{convert::remove_null_bytes, errors::ProcessorError}; 8 | use ahash::AHashMap; 9 | use diesel::{query_builder::QueryFragment, ConnectionResult, QueryResult}; 10 | use diesel_async::{ 11 | pooled_connection::{ 12 | bb8::{Pool, PooledConnection}, 13 | AsyncDieselConnectionManager, ManagerConfig, PoolError, 14 | }, 15 | AsyncPgConnection, RunQueryDsl, 16 | }; 17 | use diesel_migrations::{EmbeddedMigrations, MigrationHarness}; 18 | use futures_util::{future::BoxFuture, FutureExt}; 19 | use std::sync::Arc; 20 | use tracing::{info, warn}; 21 | 22 | pub type Backend = diesel::pg::Pg; 23 | 24 | pub type MyDbConnection = AsyncPgConnection; 25 | pub type DbPool = Pool; 26 | pub type ArcDbPool = Arc; 27 | pub type DbPoolConnection<'a> = PooledConnection<'a, MyDbConnection>; 28 | 29 | pub const DEFAULT_MAX_POOL_SIZE: u32 = 150; 30 | 31 | // the max is actually u16::MAX but we see that when the size is too big we get an overflow error so reducing it a bit 32 | pub const MAX_DIESEL_PARAM_SIZE: usize = (u16::MAX / 2) as usize; 33 | 34 | /// This function will clean the data for postgres. Currently it has support for removing 35 | /// null bytes from strings but in the future we will add more functionality. 36 | pub fn clean_data_for_db serde::Deserialize<'de>>( 37 | items: Vec, 38 | should_remove_null_bytes: bool, 39 | ) -> Vec { 40 | if should_remove_null_bytes { 41 | items.iter().map(remove_null_bytes).collect() 42 | } else { 43 | items 44 | } 45 | } 46 | 47 | fn establish_connection(database_url: &str) -> BoxFuture<'_, ConnectionResult> { 48 | use native_tls::{Certificate, TlsConnector}; 49 | use postgres_native_tls::MakeTlsConnector; 50 | 51 | (async move { 52 | let (url, cert_path) = parse_and_clean_db_url(database_url); 53 | let cert = std::fs::read(cert_path.unwrap()).expect("Could not read certificate"); 54 | 55 | let cert = Certificate::from_pem(&cert).expect("Could not parse certificate"); 56 | let connector = TlsConnector::builder() 57 | .danger_accept_invalid_certs(true) 58 | .add_root_certificate(cert) 59 | .build() 60 | .expect("Could not build TLS connector"); 61 | let connector = MakeTlsConnector::new(connector); 62 | 63 | let (client, connection) = tokio_postgres::connect(&url, connector) 64 | .await 65 | .expect("Could not connect to database"); 66 | tokio::spawn(async move { 67 | if let Err(e) = connection.await { 68 | eprintln!("connection error: {e}"); 69 | } 70 | }); 71 | AsyncPgConnection::try_from(client).await 72 | }) 73 | .boxed() 74 | } 75 | 76 | fn parse_and_clean_db_url(url: &str) -> (String, Option) { 77 | let mut db_url = url::Url::parse(url).expect("Could not parse database url"); 78 | let mut cert_path = None; 79 | 80 | let mut query = "".to_string(); 81 | db_url.query_pairs().for_each(|(k, v)| { 82 | if k == "sslrootcert" { 83 | cert_path = Some(v.parse().unwrap()); 84 | } else { 85 | query.push_str(&format!("{k}={v}&")); 86 | } 87 | }); 88 | db_url.set_query(Some(&query)); 89 | 90 | (db_url.to_string(), cert_path) 91 | } 92 | 93 | pub async fn new_db_pool( 94 | database_url: &str, 95 | max_pool_size: Option, 96 | ) -> Result { 97 | let (_url, cert_path) = parse_and_clean_db_url(database_url); 98 | 99 | let config = if cert_path.is_some() { 100 | let mut config = ManagerConfig::::default(); 101 | config.custom_setup = Box::new(|conn| Box::pin(establish_connection(conn))); 102 | AsyncDieselConnectionManager::::new_with_config(database_url, config) 103 | } else { 104 | AsyncDieselConnectionManager::::new(database_url) 105 | }; 106 | let pool = Pool::builder() 107 | .max_size(max_pool_size.unwrap_or(DEFAULT_MAX_POOL_SIZE)) 108 | .build(config) 109 | .await?; 110 | Ok(Arc::new(pool)) 111 | } 112 | 113 | pub async fn execute_in_chunks( 114 | conn: ArcDbPool, 115 | build_query: fn(Vec) -> U, 116 | items_to_insert: &[T], 117 | chunk_size: usize, 118 | ) -> Result<(), ProcessorError> 119 | where 120 | U: QueryFragment + diesel::query_builder::QueryId + Send + 'static, 121 | T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone + Send + 'static, 122 | { 123 | let tasks = items_to_insert 124 | .chunks(chunk_size) 125 | .map(|chunk| { 126 | let conn = conn.clone(); 127 | let items = chunk.to_vec(); 128 | tokio::spawn(async move { 129 | let query = build_query(items.clone()); 130 | execute_or_retry_cleaned(conn, build_query, items, query).await 131 | }) 132 | }) 133 | .collect::>(); 134 | 135 | let results = futures_util::future::try_join_all(tasks) 136 | .await 137 | .expect("Task panicked executing in chunks"); 138 | for res in results { 139 | res? 140 | } 141 | 142 | Ok(()) 143 | } 144 | 145 | /// Returns the entry for the config hashmap, or the default field count for the insert. 146 | /// 147 | /// Given diesel has a limit of how many parameters can be inserted in a single operation (u16::MAX), 148 | /// we default to chunk an array of items based on how many columns are in the table. 149 | pub fn get_config_table_chunk_size( 150 | table_name: &str, 151 | per_table_chunk_sizes: &AHashMap, 152 | ) -> usize { 153 | let chunk_size = per_table_chunk_sizes.get(table_name).copied(); 154 | chunk_size.unwrap_or_else(|| MAX_DIESEL_PARAM_SIZE / T::field_count()) 155 | } 156 | 157 | pub async fn execute_with_better_error( 158 | pool: ArcDbPool, 159 | query: U, 160 | ) -> Result 161 | where 162 | U: QueryFragment + diesel::query_builder::QueryId + Send, 163 | { 164 | let debug_string = diesel::debug_query::(&query).to_string(); 165 | let conn = &mut pool.get().await.map_err(|e| { 166 | warn!("Error getting connection from pool: {:?}", e); 167 | ProcessorError::DBStoreError { 168 | message: format!("{e:#}"), 169 | query: Some(debug_string.clone()), 170 | } 171 | })?; 172 | query 173 | .execute(conn) 174 | .await 175 | .inspect_err(|e| { 176 | warn!("Error running query: {:?}\n{:?}", e, debug_string); 177 | }) 178 | .map_err(|e| ProcessorError::DBStoreError { 179 | message: format!("{e:#}"), 180 | query: Some(debug_string), 181 | }) 182 | } 183 | 184 | pub async fn execute_with_better_error_conn( 185 | conn: &mut MyDbConnection, 186 | query: U, 187 | ) -> QueryResult 188 | where 189 | U: QueryFragment + diesel::query_builder::QueryId + Send, 190 | { 191 | let debug_string = diesel::debug_query::(&query).to_string(); 192 | tracing::debug!("Executing query: {:?}", debug_string); 193 | let res = query.execute(conn).await; 194 | if let Err(ref e) = res { 195 | tracing::warn!("Error running query: {:?}\n{:?}", e, debug_string); 196 | } 197 | res 198 | } 199 | 200 | async fn execute_or_retry_cleaned( 201 | conn: ArcDbPool, 202 | build_query: fn(Vec) -> U, 203 | items: Vec, 204 | query: U, 205 | ) -> Result<(), ProcessorError> 206 | where 207 | U: QueryFragment + diesel::query_builder::QueryId + Send, 208 | T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone, 209 | { 210 | match execute_with_better_error(conn.clone(), query).await { 211 | Ok(_) => {}, 212 | Err(_) => { 213 | let cleaned_items = clean_data_for_db(items, true); 214 | let cleaned_query = build_query(cleaned_items); 215 | match execute_with_better_error(conn.clone(), cleaned_query).await { 216 | Ok(_) => {}, 217 | Err(e) => { 218 | return Err(e); 219 | }, 220 | } 221 | }, 222 | } 223 | Ok(()) 224 | } 225 | 226 | pub fn run_pending_migrations( 227 | conn: &mut impl MigrationHarness, 228 | migrations: EmbeddedMigrations, 229 | ) { 230 | conn.run_pending_migrations(migrations) 231 | .expect("[Parser] Migrations failed!"); 232 | } 233 | 234 | // For the normal processor build we just use standard Diesel with the postgres 235 | // feature enabled (which uses libpq under the hood, hence why we named the feature 236 | // this way). 237 | #[cfg(feature = "postgres_full")] 238 | pub async fn run_migrations( 239 | postgres_connection_string: String, 240 | _conn_pool: ArcDbPool, 241 | migrations: EmbeddedMigrations, 242 | ) { 243 | use diesel::{Connection, PgConnection}; 244 | 245 | info!("Running migrations: {:?}", postgres_connection_string); 246 | let migration_time = std::time::Instant::now(); 247 | let mut conn = 248 | PgConnection::establish(&postgres_connection_string).expect("migrations failed!"); 249 | run_pending_migrations(&mut conn, migrations); 250 | info!( 251 | duration_in_secs = migration_time.elapsed().as_secs_f64(), 252 | "[Parser] Finished migrations" 253 | ); 254 | } 255 | 256 | // If the postgres_full feature isn't enabled, we use diesel async instead. This is used by 257 | // the CLI for the local testnet, where we cannot tolerate the libpq dependency. 258 | #[cfg(not(feature = "postgres_full"))] 259 | pub async fn run_migrations( 260 | postgres_connection_string: String, 261 | conn_pool: ArcDbPool, 262 | migrations: EmbeddedMigrations, 263 | ) { 264 | use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; 265 | 266 | info!("Running migrations: {:?}", postgres_connection_string); 267 | let conn = conn_pool 268 | // We need to use this since AsyncConnectionWrapper doesn't know how to 269 | // work with a pooled connection. 270 | .dedicated_connection() 271 | .await 272 | .expect("[Parser] Failed to get connection"); 273 | // We use spawn_blocking since run_pending_migrations is a blocking function. 274 | tokio::task::spawn_blocking(move || { 275 | // This lets us use the connection like a normal diesel connection. See more: 276 | // https://docs.rs/diesel-async/latest/diesel_async/async_connection_wrapper/type.AsyncConnectionWrapper.html 277 | let mut conn: AsyncConnectionWrapper = 278 | AsyncConnectionWrapper::from(conn); 279 | run_pending_migrations(&mut conn, migrations); 280 | }) 281 | .await 282 | .expect("[Parser] Failed to run migrations"); 283 | } 284 | 285 | pub struct DbContext<'a> { 286 | pub conn: DbPoolConnection<'a>, 287 | pub query_retries: u32, 288 | pub query_retry_delay_ms: u64, 289 | } 290 | --------------------------------------------------------------------------------