├── .github ├── CODEOWNERS └── workflows │ ├── lint.yaml │ ├── release.yaml │ ├── tests.yaml │ ├── update-processor-sdk-version-legacy.yaml │ ├── update-processor-sdk-version.yaml │ └── update-proto-dependency.yaml ├── .gitignore ├── README.md ├── aptos-indexer-processors-sdk ├── .cargo │ └── config.toml ├── Cargo.lock ├── Cargo.toml ├── instrumented-channel │ ├── Cargo.lock │ ├── Cargo.toml │ └── src │ │ ├── channel_metrics.rs │ │ └── lib.rs ├── moving-average │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── rustfmt.toml ├── sample │ ├── Cargo.toml │ └── src │ │ └── lib.rs ├── sdk │ ├── Cargo.toml │ └── src │ │ ├── builder │ │ ├── dag.rs │ │ ├── mod.rs │ │ └── processor_builder.rs │ │ ├── common_steps │ │ ├── arcify_step.rs │ │ ├── mod.rs │ │ ├── order_by_version_step.rs │ │ ├── timed_buffer_step.rs │ │ ├── transaction_stream_step.rs │ │ ├── version_tracker_step.rs │ │ └── write_rate_limit_step.rs │ │ ├── lib.rs │ │ ├── postgres │ │ ├── README.md │ │ ├── basic_processor │ │ │ ├── README.md │ │ │ ├── basic_processor_function.rs │ │ │ ├── basic_processor_step.rs │ │ │ └── mod.rs │ │ ├── db │ │ │ ├── diesel.toml │ │ │ ├── migrations │ │ │ │ ├── 00000000000000_diesel_initial_setup │ │ │ │ │ ├── down.sql │ │ │ │ │ └── up.sql │ │ │ │ └── 2025-03-06-201942_create_core_schema │ │ │ │ │ ├── down.sql │ │ │ │ │ └── up.sql │ │ │ └── processor_metadata_schema.rs │ │ ├── mod.rs │ │ ├── models │ │ │ ├── ledger_info.rs │ │ │ ├── mod.rs │ │ │ └── processor_status.rs │ │ ├── subconfigs │ │ │ ├── mod.rs │ │ │ └── postgres_config.rs │ │ └── utils │ │ │ ├── checkpoint.rs │ │ │ ├── database.rs │ │ │ └── mod.rs │ │ ├── server_framework.rs │ │ ├── test │ │ ├── mod.rs │ │ ├── steps │ │ │ ├── mod.rs │ │ │ └── pass_through_step.rs │ │ └── utils.rs │ │ ├── testing_framework │ │ ├── cli_parser.rs │ │ ├── database.rs │ │ ├── mock_grpc.rs │ │ ├── mod.rs │ │ └── sdk_test_context.rs │ │ ├── traits │ │ ├── README.md │ │ ├── async_step.rs │ │ ├── instrumentation.rs │ │ ├── into_runnable_step.rs │ │ ├── mod.rs │ │ ├── pollable_async_step.rs │ │ ├── processable.rs │ │ ├── processor_trait.rs │ │ └── runnable_step.rs │ │ ├── types │ │ ├── mod.rs │ │ └── transaction_context.rs │ │ └── utils │ │ ├── README.md │ │ ├── chain_id_check.rs │ │ ├── constants.rs │ │ ├── convert.rs │ │ ├── errors.rs │ │ ├── extract.rs │ │ ├── mod.rs │ │ ├── property_map.rs │ │ └── step_metrics.rs └── transaction-stream │ ├── Cargo.toml │ └── src │ ├── config.rs │ ├── lib.rs │ ├── transaction_stream.rs │ └── utils │ ├── additional_headers.rs │ ├── mod.rs │ └── time.rs ├── examples ├── .cargo │ └── config.toml ├── Cargo.lock ├── Cargo.toml ├── postgres-basic-events-example │ ├── Cargo.toml │ ├── README.md │ ├── example-config.yaml │ └── src │ │ ├── db │ │ ├── diesel.toml │ │ ├── migrations │ │ │ ├── 00000000000000_diesel_initial_setup │ │ │ │ ├── down.sql │ │ │ │ └── up.sql │ │ │ └── 2025-03-06-231718_create_events │ │ │ │ ├── down.sql │ │ │ │ └── up.sql │ │ └── schema.rs │ │ ├── events_model.rs │ │ └── main.rs └── rustfmt.toml └── scripts ├── check_banned_deps.sh └── rust_lint.sh /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @aptos-labs/ecosystem-infra @rtso -------------------------------------------------------------------------------- /.github/workflows/lint.yaml: -------------------------------------------------------------------------------- 1 | name: "Lint" 2 | on: 3 | # Allow us to run this specific workflow without a PR 4 | workflow_dispatch: 5 | pull_request: 6 | push: 7 | branches: 8 | - main 9 | 10 | # cancel redundant builds 11 | concurrency: 12 | # for push and workflow_dispatch events we use `github.sha` in the concurrency group and don't really cancel each other out/limit concurrency 13 | # for pull_request events newer jobs cancel earlier jobs to save on CI etc. 14 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | Lint: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - name: Install Dependencies (aptos-indexer-processors-sdkl) 24 | run: | 25 | sudo apt update && sudo apt install libdw-dev 26 | cargo install cargo-sort 27 | rustup update 28 | rustup toolchain install nightly 29 | rustup component add clippy --toolchain nightly 30 | rustup component add rustfmt --toolchain nightly 31 | working-directory: aptos-indexer-processors-sdk 32 | 33 | - name: Install Dependencies (examples) 34 | run: | 35 | sudo apt update && sudo apt install libdw-dev 36 | cargo install cargo-sort 37 | rustup update 38 | rustup toolchain install nightly 39 | rustup component add clippy --toolchain nightly 40 | rustup component add rustfmt --toolchain nightly 41 | working-directory: examples 42 | 43 | - name: Run Linter 44 | run: | 45 | bash scripts/rust_lint.sh --check 46 | 47 | - name: Check Banned Dependencies 48 | run: bash scripts/check_banned_deps.sh 49 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: "Create Release Tag" 2 | on: 3 | workflow_dispatch: 4 | inputs: 5 | release_type: 6 | description: 'Type of release (patch/minor)' 7 | required: true 8 | type: choice 9 | options: 10 | - 'release patch' 11 | - 'release minor' 12 | 13 | jobs: 14 | create-tag: 15 | runs-on: ubuntu-latest 16 | permissions: 17 | contents: write 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | fetch-depth: 0 # Fetch all history for all tags and branches 22 | 23 | - name: Determine Next Version 24 | id: next-version 25 | run: | 26 | # Get the latest tag that matches our pattern 27 | latest_tag=$(git tag -l "aptos-indexer-sdk-v*" | sort -V | tail -n 1) 28 | 29 | if [ -z "$latest_tag" ]; then 30 | # If no existing tag, start with 1.0.0 31 | echo "next_tag=aptos-indexer-sdk-v1.0.0" >> $GITHUB_OUTPUT 32 | exit 0 33 | fi 34 | 35 | # Extract version numbers 36 | version=$(echo $latest_tag | sed 's/aptos-indexer-sdk-v//') 37 | major=$(echo $version | cut -d. -f1) 38 | minor=$(echo $version | cut -d. -f2) 39 | patch=$(echo $version | cut -d. -f3) 40 | 41 | if [ "${{ github.event.inputs.release_type }}" = "release patch" ]; then 42 | # Increment patch version 43 | new_version="${major}.${minor}.$((patch + 1))" 44 | else 45 | # Increment minor version, reset patch to 0 46 | new_version="${major}.$((minor + 1)).0" 47 | fi 48 | 49 | echo "next_tag=aptos-indexer-sdk-v${new_version}" >> $GITHUB_OUTPUT 50 | echo "Current version: ${latest_tag}" 51 | echo "Next version will be: aptos-indexer-sdk-v${new_version}" 52 | 53 | - name: Create and Push Tag 54 | run: | 55 | git tag ${{ steps.next-version.outputs.next_tag }} 56 | git push origin ${{ steps.next-version.outputs.next_tag }} 57 | 58 | - name: Create Release 59 | uses: softprops/action-gh-release@v1 60 | with: 61 | tag_name: ${{ steps.next-version.outputs.next_tag }} 62 | name: ${{ steps.next-version.outputs.next_tag }} 63 | generate_release_notes: true -------------------------------------------------------------------------------- /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: "Tests" 2 | on: 3 | workflow_dispatch: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | Test: 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | 19 | - name: Install Dependencies 20 | run: | 21 | sudo apt update && sudo apt install libdw-dev 22 | cargo install cargo-sort 23 | rustup update 24 | rustup toolchain install nightly 25 | working-directory: aptos-indexer-processors-sdk 26 | 27 | - name: Build with No Default Features 28 | run: cargo build --no-default-features 29 | working-directory: aptos-indexer-processors-sdk 30 | 31 | - name: Run Tests 32 | id: tests 33 | continue-on-error: true # Allow workflow to continue if tests fail 34 | run: cargo test 35 | working-directory: aptos-indexer-processors-sdk 36 | 37 | - name: Notify Eco Infra Oncall about proto update failure 38 | if: | 39 | steps.tests.outcome == 'failure' && 40 | github.event_name == 'pull_request' && 41 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 42 | uses: slackapi/slack-github-action@v1.24.0 43 | with: 44 | # eco-infra-oncall channel. 45 | channel-id: 'C0468USBLQJ' 46 | slack-message: | 47 | :warning: Tests failed on PR with indexer-sdk-update label 48 | PR: ${{ github.event.pull_request.html_url }} 49 | Author: ${{ github.event.pull_request.user.login }} 50 | Title: ${{ github.event.pull_request.title }} 51 | env: 52 | SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} 53 | -------------------------------------------------------------------------------- /.github/workflows/update-processor-sdk-version-legacy.yaml: -------------------------------------------------------------------------------- 1 | name: Update Processor SDK Version 2 | 'on': 3 | pull_request: 4 | types: [labeled, opened, synchronize, reopened, auto_merge_enabled] 5 | branches: 6 | - main 7 | paths: 8 | # Be conservative and only run this workflow when the Cargo.toml file changes. 9 | # Reason: if SDK version is not updated, no change will be picked up by processors. 10 | - aptos-indexer-processors-sdk/Cargo.toml 11 | 12 | permissions: 13 | contents: read 14 | id-token: write 15 | 16 | jobs: 17 | update-processor-sdk-version: 18 | runs-on: ubuntu-latest 19 | # Only run on each PR once an appropriate event occurs 20 | if: | 21 | ( 22 | github.event_name == 'push' || 23 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 24 | ) 25 | steps: 26 | - id: auth 27 | uses: "google-github-actions/auth@v2" 28 | with: 29 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 30 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 31 | - name: Get Secret Manager Secrets 32 | id: secrets 33 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 34 | with: 35 | secrets: |- 36 | token:aptos-ci/github-actions-repository-dispatch 37 | - name: Checkout code 38 | uses: actions/checkout@v4 39 | with: 40 | token: ${{ steps.secrets.outputs.token }} 41 | - name: Capture the commit hash 42 | id: commit_hash 43 | run: | 44 | # Echo the commit hash to the output 45 | echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)" 46 | # Echo the PR branch name to the output 47 | echo "::set-output name=branch_name::${{ github.head_ref }}" 48 | - name: Setup Rust 49 | uses: actions-rust-lang/setup-rust-toolchain@v1 50 | - name: Install toml 51 | run: cargo install toml-cli 52 | - name: Capture aptos-protos commit hash 53 | id: aptos_protos_commit_hash 54 | run: | 55 | cd aptos-indexer-processors-sdk 56 | aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev) 57 | echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}" 58 | - name: Dispatch Event to processors Repo 59 | uses: peter-evans/repository-dispatch@v3.0.0 60 | with: 61 | token: ${{ steps.secrets.outputs.token }} 62 | repository: 'aptos-labs/aptos-indexer-processors' 63 | event-type: 'sdk-dependency-update' 64 | client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}' 65 | -------------------------------------------------------------------------------- /.github/workflows/update-processor-sdk-version.yaml: -------------------------------------------------------------------------------- 1 | name: Update Processor SDK Version 2 | 'on': 3 | pull_request: 4 | types: [labeled, opened, synchronize, reopened, auto_merge_enabled] 5 | branches: 6 | - main 7 | paths: 8 | # Be conservative and only run this workflow when the Cargo.toml file changes. 9 | # Reason: if SDK version is not updated, no change will be picked up by processors. 10 | - aptos-indexer-processors-sdk/Cargo.toml 11 | 12 | permissions: 13 | contents: read 14 | id-token: write 15 | 16 | jobs: 17 | update-processor-sdk-version: 18 | runs-on: ubuntu-latest 19 | # Only run on each PR once an appropriate event occurs 20 | if: | 21 | ( 22 | github.event_name == 'push' || 23 | contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update') 24 | ) 25 | steps: 26 | - id: auth 27 | uses: "google-github-actions/auth@v2" 28 | with: 29 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 30 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 31 | - name: Get Secret Manager Secrets 32 | id: secrets 33 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 34 | with: 35 | secrets: |- 36 | token:aptos-ci/github-actions-repository-dispatch 37 | - name: Checkout code 38 | uses: actions/checkout@v4 39 | with: 40 | token: ${{ steps.secrets.outputs.token }} 41 | - name: Capture the commit hash 42 | id: commit_hash 43 | run: | 44 | # Echo the commit hash to the output 45 | echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)" 46 | # Echo the PR branch name to the output 47 | echo "::set-output name=branch_name::${{ github.head_ref }}" 48 | - name: Setup Rust 49 | uses: actions-rust-lang/setup-rust-toolchain@v1 50 | - name: Install toml 51 | run: cargo install toml-cli 52 | - name: Capture aptos-protos commit hash 53 | id: aptos_protos_commit_hash 54 | run: | 55 | cd aptos-indexer-processors-sdk 56 | aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev) 57 | echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}" 58 | - name: Dispatch Event to processors Repo 59 | uses: peter-evans/repository-dispatch@v3.0.0 60 | with: 61 | token: ${{ steps.secrets.outputs.token }} 62 | repository: 'aptos-labs/aptos-indexer-processors-v2' 63 | event-type: 'sdk-dependency-update' 64 | client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}' 65 | -------------------------------------------------------------------------------- /.github/workflows/update-proto-dependency.yaml: -------------------------------------------------------------------------------- 1 | name: Update Proto Dependency 2 | 3 | on: 4 | repository_dispatch: 5 | types: [proto-dependency-update] 6 | workflow_dispatch: 7 | inputs: 8 | commit_hash: 9 | description: 'Commit hash to update proto to' 10 | required: true 11 | branch_name: 12 | description: 'Branch name (without -update-aptos-protos suffix)' 13 | required: true 14 | default: 'main' 15 | 16 | permissions: 17 | contents: write 18 | pull-requests: write 19 | id-token: write 20 | 21 | jobs: 22 | update-the-dependency: 23 | runs-on: ubuntu-latest 24 | steps: 25 | - id: auth 26 | uses: "google-github-actions/auth@v2" 27 | with: 28 | workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }} 29 | service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }} 30 | - name: Get Secret Manager Secrets 31 | id: secrets 32 | uses: 'google-github-actions/get-secretmanager-secrets@v2' 33 | with: 34 | secrets: |- 35 | token:aptos-ci/github-actions-repository-dispatch 36 | - name: Configure Git user 37 | run: | 38 | git config --global user.name "Aptos Bot" 39 | git config --global user.email "aptos-bot@aptoslabs.com" 40 | - name: Checkout 41 | uses: actions/checkout@v4 42 | with: 43 | token: ${{ steps.secrets.outputs.token }} 44 | - name: Setup Rust 45 | uses: actions-rust-lang/setup-rust-toolchain@v1 46 | 47 | - name: Install toml 48 | run: cargo install toml-cli 49 | 50 | - name: Update the dependency 51 | run: | 52 | set -e 53 | toml set Cargo.toml workspace.dependencies.aptos-protos.rev ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }} > Cargo.tmp && mv Cargo.tmp Cargo.toml 54 | working-directory: aptos-indexer-processors-sdk/ 55 | 56 | - name: Commit and Push Changes 57 | run: | 58 | set -e 59 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 60 | git checkout -b "$branch_name" 61 | git add Cargo.toml 62 | git commit -m "Update aptos-protos to ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }}" 63 | git push origin "$branch_name" --force 64 | env: 65 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 66 | working-directory: aptos-indexer-processors-sdk/ 67 | 68 | - name: Check if PR Already Exists 69 | id: check_pr 70 | run: | 71 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 72 | existing_pr=$(gh pr list --base main --head "$branch_name" --json number --jq '.[].number') 73 | if [ -n "$existing_pr" ]; then 74 | echo "::set-output name=if_pr_exists::true" 75 | else 76 | echo "::set-output name=if_pr_exists::false" 77 | fi 78 | env: 79 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 80 | - name: Create Pull Request 81 | if: steps.check_pr.outputs.if_pr_exists == 'false' 82 | run: | 83 | branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos" 84 | gh pr create --title "Update aptos-protos to upstream branch ${{ github.event.client_payload.branch_name }}" \ 85 | --body "This PR updates aptos-protos to new version." \ 86 | --base main \ 87 | --head "$branch_name" \ 88 | --label "indexer-sdk-update" 89 | env: 90 | GITHUB_TOKEN: ${{ steps.secrets.outputs.token }} 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | 3 | # Rust specific ignores 4 | # Please follow https://help.github.com/en/articles/ignoring-files to create a global 5 | # .gitignore file locally for IDE/Emacs/Vim generated files. 6 | **/target 7 | **/*.rs.bk 8 | .idea/ 9 | 10 | # macOS Specific ignores 11 | # General 12 | .DS_Store 13 | .AppleDouble 14 | .LSOverride 15 | 16 | # VSCode settings 17 | .vscode/ 18 | 19 | # Processor config 20 | config.yaml -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Aptos Indexer SDK 2 | Generally, an indexer processor follow this flow: 3 | 4 | 1. Receive a stream of Aptos transactions 5 | 2. Extract data from the transactions 6 | 3. Transform and merge the parsed data into a coherent, standardized schema 7 | 4. Store the transformed data into a database 8 | 9 | The Aptos Indexer SDK works by modeling each processor as a graph of independent steps. Each of the steps in the flow above is written as a `Step` in the SDK, and the output of each `Step` is connected to the input of the next `Step` by a channel. 10 | 11 | # How to use 12 | 13 | To your `Cargo.toml` , add 14 | 15 | ```yaml 16 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" } 17 | aptos-indexer-processor-sdk-server-framework = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" } 18 | ``` 19 | 20 | # Get started 21 | 22 | We’ve created a [Quickstart Guide to Aptos Indexer SDK](https://github.com/aptos-labs/aptos-indexer-processor-example) which gets you setup and running an events processor that indexes events on the Aptos blockchain. 23 | 24 | # Documentation 25 | 26 | ## Creating a step 27 | 28 | To create a step in the SDK, implement these traits: 29 | 30 | 1. **Processable** 31 | 32 | ```rust 33 | #[async_trait] 34 | impl Processable for MyExtractorStep { 35 | type Input = Transaction; 36 | type Output = ExtractedDataModel; 37 | type RunType = AsyncRunType; 38 | 39 | // Processes a batch of input items and returns a batch of output items. 40 | async fn process( 41 | &mut self, 42 | input: TransactionContext, 43 | ) -> Result>, ProcessorError> { 44 | let extracted_data = ... 45 | // Extract data from items.data 46 | 47 | Ok(Some(TransactionContext { 48 | data: extracted_data, 49 | start_version: input.start_version, 50 | end_version: input.end_version, 51 | start_transaction_timestamp: input.start_transaction_timestamp, 52 | end_transaction_timestamp: input.end_transaction_timestamp, 53 | total_size_in_bytes: input.total_size_in_bytes, 54 | })) 55 | } 56 | } 57 | ``` 58 | 59 | 2. **NamedStep** 60 | 61 | ```rust 62 | impl NamedStep for MyExtractorStep { 63 | fn name(&self) -> String { 64 | "MyExtractorStep".to_string() 65 | } 66 | } 67 | ``` 68 | 69 | 3. Either **AsyncStep** or **PollableAsyncStep**, which defines how the step will be run in the processor. 70 | 1. The most basic step is an `AsyncStep`, which processes a batch of input items and returns a batch of output items. 71 | 72 | ```rust 73 | #[async_trait] 74 | impl Processable for MyExtractorStep { 75 | ... 76 | type RunType = AsyncRunType; 77 | ... 78 | } 79 | 80 | impl AsyncStep for MyExtractorStep {} 81 | ``` 82 | 83 | 2. A `PollableAsyncStep` does the same as `AsyncStep`, but it also periodically polls its internal state and returns a batch of output items if available. 84 | 85 | ```rust 86 | #[async_trait] 87 | impl Processable for MyPollStep 88 | where 89 | Self: Sized + Send + 'static, 90 | T: Send + 'static, 91 | { 92 | ... 93 | type RunType = PollableAsyncRunType; 94 | ... 95 | } 96 | 97 | #[async_trait] 98 | impl PollableAsyncStep for MyPollStep 99 | where 100 | Self: Sized + Send + Sync + 'static, 101 | T: Send + 'static, 102 | { 103 | /// Returns the duration between polls 104 | fn poll_interval(&self) -> std::time::Duration { 105 | // Define duration 106 | } 107 | 108 | /// Polls the internal state and returns a batch of output items if available. 109 | async fn poll(&mut self) -> Result>>, ProcessorError> { 110 | // Define polling logic 111 | } 112 | } 113 | ``` 114 | 115 | 116 | ## Common steps 117 | 118 | The SDK provides several common steps to use in your processor. 119 | 120 | 1. `TransactionStreamStep` provides a stream of Aptos transactions to the processor 121 | 2. `TimedBufferStep` buffers a batch of items and periodically polls to release the items to the next step 122 | 123 | ## Connecting steps 124 | 125 | When `ProcessorBuilder` connects two steps, a channel is created linking the two steps and the output of the first step becomes the input of the next step. 126 | 127 | ```rust 128 | let (pb, buffer_receiver) = ProcessorBuilder::new_with_inputless_first_step( 129 | first_step.into_runnable_step(), 130 | ) 131 | .connect_to(second_step.into_runnable_step(), channel_size) 132 | .connect_to(third_step.into_runnable_step(), channel_size) 133 | .end_and_return_output_receiver(channel_size); 134 | ``` 135 | 136 | ## Adding a new processor 137 | 138 | 1. Use [aptos-indexer-processor-example](https://github.com/aptos-labs/aptos-indexer-processor-example) as a starting point 139 | 2. Add the new processor to [ProcessorConfig](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/processor_config.rs#L34) and [Processor](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/processor_config.rs#L58) 140 | 3. Add the processor to [RunnableConfig](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/indexer_processor_config.rs#L25) 141 | 142 | ## Running a processor 143 | 144 | To run the processor, we recommend using the example in [aptos-indexer-processor-example](https://github.com/aptos-labs/aptos-indexer-processor-example) and following this [configuration guide](https://github.com/aptos-labs/aptos-indexer-processor-example?tab=readme-ov-file#configuring-your-processor). 145 | 146 | ## Advanced features (experimental) 147 | 148 | 1. Fanout + ArcifyStep 149 | 2. Fan in 150 | 151 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xclippy = [ 3 | "clippy", 4 | "--workspace", 5 | "--all-targets", 6 | "--all-features", 7 | "--", 8 | "-Dwarnings", 9 | "-Wclippy::all", 10 | "-Aclippy::upper_case_acronyms", 11 | "-Aclippy::enum-variant-names", 12 | "-Aclippy::result-large-err", 13 | "-Aclippy::mutable-key-type", 14 | "-Aclippy::map_identity", # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764 15 | ] 16 | 17 | [build] 18 | rustflags = [ 19 | "--cfg", 20 | "tokio_unstable", 21 | "-C", 22 | "force-frame-pointers=yes", 23 | "-C", 24 | "force-unwind-tables=yes", 25 | ] 26 | 27 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here. 28 | #[target.x86_64-unknown-linux-gnu] 29 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"] 30 | 31 | # 64 bit MSVC 32 | #[target.x86_64-pc-windows-msvc] 33 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB] 34 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | 4 | members = [ 5 | "instrumented-channel", 6 | "moving-average", 7 | "sample", 8 | "sdk", 9 | "transaction-stream", 10 | ] 11 | 12 | [workspace.package] 13 | authors = ["Aptos Labs "] 14 | edition = "2021" 15 | homepage = "https://aptoslabs.com" 16 | license = "Apache-2.0" 17 | publish = false 18 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk" 19 | rust-version = "1.78" 20 | 21 | [workspace.dependencies] 22 | aptos-indexer-processor-sdk = { path = "sdk" } 23 | aptos-indexer-transaction-stream = { path = "transaction-stream" } 24 | instrumented-channel = { path = "instrumented-channel" } 25 | aptos-moving-average = { path = "moving-average" } 26 | sample = { path = "sample" } 27 | 28 | ahash = { version = "0.8.7", features = ["serde"] } 29 | anyhow = "1.0.98" 30 | aptos-protos = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" } 31 | aptos-system-utils = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" } 32 | aptos-transaction-filter = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" } 33 | async-trait = "0.1.80" 34 | autometrics = { version = "1.0.1", features = ["prometheus-exporter"] } 35 | axum = "0.7.5" 36 | backtrace = "0.3.58" 37 | bcs = { git = "https://github.com/aptos-labs/bcs.git", rev = "d31fab9d81748e2594be5cd5cdf845786a30562d" } 38 | bigdecimal = { version = "0.4.0", features = ["serde"] } 39 | chrono = { version = "0.4.19", features = ["clock", "serde"] } 40 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] } 41 | # Do NOT enable the postgres feature here, it is conditionally enabled in a feature 42 | # block in the Cargo.toml file for the processor crate. 43 | # https://github.com/aptos-labs/aptos-indexer-processors/pull/325 44 | diesel = { version = "=2.2.0", features = [ 45 | "chrono", 46 | "postgres_backend", 47 | "numeric", 48 | "serde_json", 49 | ] } 50 | # Use the crate version once this feature gets released on crates.io: 51 | # https://github.com/weiznich/diesel_async/commit/e165e8c96a6c540ebde2d6d7c52df5c5620a4bf1 52 | diesel-async = { git = "https://github.com/weiznich/diesel_async.git", rev = "e3beac66cd41ab53d78a10328bb72f272103e5d1", features = [ 53 | "async-connection-wrapper", 54 | "postgres", 55 | "bb8", 56 | "tokio", 57 | ] } 58 | diesel_migrations = { version = "2.1.0", features = ["postgres"] } 59 | delegate = "0.12.0" 60 | derive_builder = "0.20.0" 61 | field_count = "0.1.1" 62 | futures = "0.3.30" 63 | futures-util = "0.3.21" 64 | hex = "0.4.3" 65 | indexmap = { version = "2.7.0", features = ["serde"] } 66 | itertools = "0.13.0" 67 | 68 | # Locking this because newer versions of kanal are using the unstable feature error_in_core, which 69 | # will break the Aptos CLI. 70 | kanal = "=0.1.0-pre8" 71 | lazy_static = "1.4.0" 72 | mockall = "0.12.1" 73 | num_cpus = "1.16.0" 74 | once_cell = { version = "1.19.0" } 75 | petgraph = "0.6.5" 76 | prometheus = "0.13.3" 77 | prometheus-client = "0.22.2" 78 | prost = { version = "0.13.4", features = ["no-recursion-limit"] } 79 | rayon = "1.10.0" 80 | serde = { version = "1.0.193", features = ["derive", "rc"] } 81 | serde_json = { version = "1.0.81", features = ["preserve_order"] } 82 | serde_yaml = "0.8.24" 83 | sha2 = "0.9.3" 84 | strum = { version = "0.24.1", features = ["derive"] } 85 | tempfile = "3.3.0" 86 | testcontainers = "0.20.1" 87 | thiserror = "1.0.61" 88 | tiny-keccak = { version = "2.0.2", features = ["keccak", "sha3"] } 89 | tracing = "0.1.34" 90 | tokio = { version = "1.37.0", features = ["full"] } 91 | tokio-retry = { version = "0.3.0" } 92 | toml = "0.7.4" 93 | tonic = { version = "0.12.3", features = [ 94 | "tls", 95 | "tls-roots", 96 | "transport", 97 | "prost", 98 | "codegen", 99 | "zstd", 100 | ] } 101 | tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] } 102 | url = { version = "2.5.1", features = ["serde"] } 103 | 104 | # Postgres SSL support 105 | native-tls = "0.2.11" 106 | postgres-native-tls = "0.5.0" 107 | tokio-postgres = "0.7.10" 108 | tokio-stream = { version = "0.1.16", features = ["net"] } 109 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "instrumented-channel" 7 | version = "0.1.0" 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "instrumented-channel" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | delegate = { workspace = true } 16 | derive_builder = { workspace = true } 17 | kanal = { workspace = true } 18 | once_cell = { workspace = true } 19 | prometheus = { workspace = true } 20 | prometheus-client = { workspace = true } 21 | 22 | [dev-dependencies] 23 | tokio = { workspace = true } 24 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/src/channel_metrics.rs: -------------------------------------------------------------------------------- 1 | use derive_builder::Builder; 2 | use once_cell::sync::Lazy; 3 | use prometheus_client::{ 4 | encoding::EncodeLabelSet, 5 | metrics::{counter::Counter, family::Family, gauge::Gauge}, 6 | registry::Registry, 7 | }; 8 | use std::sync::atomic::AtomicU64; 9 | 10 | pub const METRICS_PREFIX: &str = "aptos_procsdk_channel_"; 11 | 12 | pub fn init_channel_metrics_registry(registry: &mut Registry) { 13 | registry.register( 14 | format!("{}_{}", METRICS_PREFIX, "sent_messages_count"), 15 | "Number of messages sent", 16 | SENT_MESSAGES_COUNT.clone(), 17 | ); 18 | 19 | registry.register( 20 | format!("{}_{}", METRICS_PREFIX, "received_messages_count"), 21 | "Number of messages received", 22 | RECEIVED_MESSAGES_COUNT.clone(), 23 | ); 24 | 25 | registry.register( 26 | format!("{}_{}", METRICS_PREFIX, "send_duration"), 27 | "Duration in seconds to send a message", 28 | SEND_DURATION.clone(), 29 | ); 30 | 31 | registry.register( 32 | format!("{}_{}", METRICS_PREFIX, "receive_duration"), 33 | "Duration in seconds to receive a message", 34 | RECEIVE_DURATION.clone(), 35 | ); 36 | 37 | registry.register( 38 | format!("{}_{}", METRICS_PREFIX, "failed_sends_count"), 39 | "Number of failed sends", 40 | FAILED_SENDS_COUNT.clone(), 41 | ); 42 | 43 | registry.register( 44 | format!("{}_{}", METRICS_PREFIX, "failed_receives_count"), 45 | "Number of failed receives", 46 | FAILED_RECEIVES_COUNT.clone(), 47 | ); 48 | 49 | registry.register( 50 | format!("{}_{}", METRICS_PREFIX, "channel_size"), 51 | "Size of the channel", 52 | CHANNEL_SIZE.clone(), 53 | ); 54 | } 55 | 56 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)] 57 | pub struct ChannelMetricLabels { 58 | pub output_of: String, 59 | } 60 | 61 | pub static SENT_MESSAGES_COUNT: Lazy> = 62 | Lazy::new(Family::::default); 63 | 64 | pub static RECEIVED_MESSAGES_COUNT: Lazy> = 65 | Lazy::new(Family::::default); 66 | 67 | pub static SEND_DURATION: Lazy>> = 68 | Lazy::new(Family::>::default); 69 | 70 | pub static RECEIVE_DURATION: Lazy>> = 71 | Lazy::new(Family::>::default); 72 | 73 | pub static FAILED_SENDS_COUNT: Lazy> = 74 | Lazy::new(Family::::default); 75 | 76 | pub static FAILED_RECEIVES_COUNT: Lazy> = 77 | Lazy::new(Family::::default); 78 | 79 | pub static CHANNEL_SIZE: Lazy> = 80 | Lazy::new(Family::::default); 81 | 82 | #[derive(Builder, Clone)] 83 | pub struct ChannelMetrics { 84 | pub labels: ChannelMetricLabels, 85 | } 86 | 87 | impl ChannelMetrics { 88 | pub fn new(output_of: String) -> Self { 89 | Self { 90 | labels: ChannelMetricLabels { output_of }, 91 | } 92 | } 93 | } 94 | 95 | impl ChannelMetrics { 96 | pub fn inc_sent_messages_count(&self) -> &Self { 97 | SENT_MESSAGES_COUNT.get_or_create(&self.labels).inc(); 98 | self 99 | } 100 | 101 | pub fn inc_received_messages_count(&self) -> &Self { 102 | RECEIVED_MESSAGES_COUNT.get_or_create(&self.labels).inc(); 103 | self 104 | } 105 | 106 | pub fn inc_failed_sends_count(&self) -> &Self { 107 | FAILED_SENDS_COUNT.get_or_create(&self.labels).inc(); 108 | self 109 | } 110 | 111 | pub fn inc_failed_receives_count(&self) -> &Self { 112 | FAILED_RECEIVES_COUNT.get_or_create(&self.labels).inc(); 113 | self 114 | } 115 | 116 | pub fn log_send_duration(&self, duration: f64) -> &Self { 117 | SEND_DURATION.get_or_create(&self.labels).set(duration); 118 | self 119 | } 120 | 121 | pub fn log_receive_duration(&self, duration: f64) -> &Self { 122 | RECEIVE_DURATION.get_or_create(&self.labels).set(duration); 123 | self 124 | } 125 | 126 | pub fn log_channel_size(&self, size: u64) -> &Self { 127 | CHANNEL_SIZE.get_or_create(&self.labels).set(size as i64); 128 | self 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/instrumented-channel/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod channel_metrics; 2 | 3 | use channel_metrics::ChannelMetrics; 4 | use delegate::delegate; 5 | /** 6 | 7 | # Instrumented Channel 8 | This is a wrapper and abstraction over the kanal channel (for now), but it can be extended to support other channels as well. 9 | 10 | The main purpose of this crate is to provide a way to instrument the channel, so that we can track the number of messages sent and received, and the time taken to send and receive messages. 11 | 12 | ## Example 13 | ```rust 14 | use instrumented_channel::instrumented_bounded_channel; 15 | use tokio::time::{sleep, Duration}; 16 | 17 | #[tokio::main] 18 | async fn main() { 19 | let (sender, receiver) = instrumented_bounded_channel("channel_name", 10); 20 | sender.send(42).await.unwrap(); 21 | assert_eq!(receiver.recv().await.unwrap(), 42); 22 | } 23 | ``` 24 | **/ 25 | use kanal::{AsyncReceiver, AsyncSender, ReceiveError, SendError}; 26 | 27 | pub struct InstrumentedAsyncSender { 28 | pub(crate) sender: AsyncSender, 29 | // Metrics 30 | pub(crate) channel_metrics: channel_metrics::ChannelMetrics, 31 | } 32 | 33 | impl InstrumentedAsyncSender { 34 | // shared_send_impl methods 35 | delegate! { 36 | to self.sender { 37 | pub fn is_disconnected(&self) -> bool; 38 | pub fn len(&self) -> usize; 39 | pub fn is_empty(&self) -> bool; 40 | pub fn is_full(&self) -> bool; 41 | pub fn capacity(&self); 42 | pub fn receiver_count(&self) -> u32; 43 | pub fn sender_count(&self) -> u32; 44 | pub fn close(&self) -> bool; 45 | pub fn is_closed(&self) -> bool; 46 | } 47 | } 48 | 49 | pub fn new(sender: AsyncSender, output_of: &str) -> Self { 50 | let channel_metrics = ChannelMetrics::new(output_of.to_string()); 51 | 52 | Self { 53 | sender, 54 | channel_metrics, 55 | } 56 | } 57 | 58 | pub async fn send(&'_ self, data: T) -> Result<(), SendError> { 59 | let send_start = std::time::Instant::now(); 60 | let res = self.sender.send(data).await; 61 | let send_duration = send_start.elapsed(); 62 | 63 | if res.is_err() { 64 | self.channel_metrics 65 | .log_send_duration(send_duration.as_secs_f64()) 66 | .log_channel_size(self.sender.len() as u64) 67 | .inc_failed_sends_count(); 68 | } else { 69 | self.channel_metrics 70 | .log_send_duration(send_duration.as_secs_f64()) 71 | .log_channel_size(self.sender.len() as u64) 72 | .inc_sent_messages_count(); 73 | } 74 | 75 | res 76 | } 77 | } 78 | 79 | impl Clone for InstrumentedAsyncSender { 80 | fn clone(&self) -> Self { 81 | Self { 82 | sender: self.sender.clone(), 83 | channel_metrics: self.channel_metrics.clone(), 84 | } 85 | } 86 | } 87 | 88 | pub struct InstrumentedAsyncReceiver { 89 | pub(crate) receiver: AsyncReceiver, 90 | // Metrics 91 | pub(crate) channel_metrics: ChannelMetrics, 92 | } 93 | 94 | impl InstrumentedAsyncReceiver { 95 | // shared_recv_impl methods 96 | delegate! { 97 | to self.receiver { 98 | pub fn is_disconnected(&self) -> bool; 99 | pub fn len(&self) -> usize; 100 | pub fn is_empty(&self) -> bool; 101 | pub fn is_full(&self) -> bool; 102 | pub fn capacity(&self); 103 | pub fn receiver_count(&self) -> u32; 104 | pub fn sender_count(&self) -> u32; 105 | pub fn close(&self) -> bool; 106 | pub fn is_closed(&self) -> bool; 107 | } 108 | } 109 | 110 | pub fn new(receiver: AsyncReceiver, output_of: &str) -> Self { 111 | let channel_metrics = ChannelMetrics::new(output_of.to_string()); 112 | Self { 113 | receiver, 114 | channel_metrics, 115 | } 116 | } 117 | 118 | pub async fn recv(&'_ self) -> Result { 119 | let receive_start = std::time::Instant::now(); 120 | let result = self.receiver.recv().await; 121 | let receive_duration = receive_start.elapsed(); 122 | 123 | if result.is_err() { 124 | self.channel_metrics 125 | .log_receive_duration(receive_duration.as_secs_f64()) 126 | .log_channel_size(self.receiver.len() as u64) 127 | .inc_failed_receives_count(); 128 | } else { 129 | self.channel_metrics 130 | .log_receive_duration(receive_duration.as_secs_f64()) 131 | .log_channel_size(self.receiver.len() as u64) 132 | .inc_received_messages_count(); 133 | } 134 | 135 | result 136 | } 137 | } 138 | 139 | impl Clone for InstrumentedAsyncReceiver { 140 | fn clone(&self) -> Self { 141 | Self { 142 | receiver: self.receiver.clone(), 143 | channel_metrics: self.channel_metrics.clone(), 144 | } 145 | } 146 | } 147 | 148 | pub fn instrumented_bounded_channel( 149 | output_of: &str, 150 | size: usize, 151 | ) -> (InstrumentedAsyncSender, InstrumentedAsyncReceiver) { 152 | let (sender, receiver) = kanal::bounded_async(size); 153 | ( 154 | InstrumentedAsyncSender::new(sender, output_of), 155 | InstrumentedAsyncReceiver::new(receiver, output_of), 156 | ) 157 | } 158 | 159 | pub fn instrumented_unbounded_channel( 160 | output_of: &str, 161 | ) -> (InstrumentedAsyncSender, InstrumentedAsyncReceiver) { 162 | let (sender, receiver) = kanal::unbounded_async(); 163 | ( 164 | InstrumentedAsyncSender::new(sender, output_of), 165 | InstrumentedAsyncReceiver::new(receiver, output_of), 166 | ) 167 | } 168 | 169 | #[cfg(test)] 170 | mod tests { 171 | use super::*; 172 | use prometheus::Encoder; 173 | 174 | fn gather_metrics_to_string() -> String { 175 | let metrics = prometheus::gather(); 176 | let mut buffer = vec![]; 177 | let encoder = prometheus::TextEncoder::new(); 178 | encoder.encode(&metrics, &mut buffer).unwrap(); 179 | String::from_utf8(buffer).unwrap() 180 | } 181 | #[tokio::test] 182 | #[allow(clippy::needless_return)] 183 | async fn test_instrumented_channel() { 184 | let (sender, receiver) = instrumented_bounded_channel("my_channel", 10); 185 | sender.send(42).await.unwrap(); 186 | sender.send(999).await.unwrap(); 187 | sender.send(3).await.unwrap(); 188 | assert_eq!(receiver.recv().await.unwrap(), 42); 189 | // TODO: check prometheus metrics 190 | let metrics = gather_metrics_to_string(); 191 | println!("{metrics}"); 192 | } 193 | } 194 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/moving-average/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-moving-average" 3 | description = "Utility to calculate moving average such as TPS" 4 | version = "0.1.0" 5 | 6 | # Workspace inherited keys 7 | authors = { workspace = true } 8 | edition = { workspace = true } 9 | homepage = { workspace = true } 10 | license = { workspace = true } 11 | publish = { workspace = true } 12 | repository = { workspace = true } 13 | rust-version = { workspace = true } 14 | 15 | [dependencies] 16 | chrono = { workspace = true } 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/moving-average/src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![forbid(unsafe_code)] 5 | 6 | use std::collections::VecDeque; 7 | 8 | // TPS data 9 | pub struct MovingAverage { 10 | window_millis: u64, 11 | // (timestamp_millis, value) 12 | values: VecDeque<(u64, u64)>, 13 | sum: u64, 14 | } 15 | 16 | impl MovingAverage { 17 | pub fn new(window_millis: u64) -> Self { 18 | let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64; 19 | let mut queue = VecDeque::new(); 20 | queue.push_back((now, 0)); 21 | Self { 22 | window_millis, 23 | values: queue, 24 | sum: 0, 25 | } 26 | } 27 | 28 | pub fn tick_now(&mut self, value: u64) { 29 | let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64; 30 | self.tick(now, value); 31 | } 32 | 33 | pub fn tick(&mut self, timestamp_millis: u64, value: u64) -> f64 { 34 | self.values.push_back((timestamp_millis, value)); 35 | self.sum += value; 36 | while self.values.len() > 2 { 37 | match self.values.front() { 38 | None => break, 39 | Some((ts, val)) => { 40 | if timestamp_millis - ts > self.window_millis { 41 | self.sum -= val; 42 | self.values.pop_front(); 43 | } else { 44 | break; 45 | } 46 | }, 47 | } 48 | } 49 | self.avg() 50 | } 51 | 52 | // Only be called after tick_now/tick is called. 53 | pub fn avg(&self) -> f64 { 54 | if self.values.len() < 2 { 55 | 0.0 56 | } else { 57 | let elapsed = self.values.back().unwrap().0 - self.values.front().unwrap().0; 58 | (self.sum * 1000) as f64 / elapsed as f64 59 | } 60 | } 61 | 62 | pub fn sum(&self) -> u64 { 63 | self.sum 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod test { 69 | use super::*; 70 | 71 | #[test] 72 | fn test_moving_average() { 73 | // 10 Second window. 74 | let mut ma = MovingAverage::new(10_000); 75 | // 9 seconds spent at 100 TPS. 76 | for _ in 0..9 { 77 | ma.tick_now(100); 78 | std::thread::sleep(std::time::Duration::from_secs(1)); 79 | } 80 | // No matter what algorithm we use, the average should be 99 at least. 81 | let avg = ma.avg(); 82 | assert!(avg >= 99.0, "Average is too low: {avg}"); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/rustfmt.toml: -------------------------------------------------------------------------------- 1 | combine_control_expr = false 2 | edition = "2021" 3 | imports_granularity = "Crate" 4 | format_macro_matchers = true 5 | group_imports = "One" 6 | hex_literal_case = "Upper" 7 | match_block_trailing_comma = true 8 | newline_style = "Unix" 9 | overflow_delimited_expr = true 10 | reorder_impl_items = true 11 | use_field_init_shorthand = true 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sample/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sample" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | tracing = { workspace = true } 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sample/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | sync::atomic::{AtomicU64, Ordering}, 3 | time::{Duration, SystemTime}, 4 | }; 5 | 6 | /// ## Sampling logs 7 | /// 8 | /// Sometimes logging a large amount of data is expensive. In order to log information only part 9 | /// of the time, we've added a `sample!` macro that's configurable on how often we want to execute some code. 10 | /// 11 | /// `SampleRate` determines how often the sampled statement will occur. 12 | /// 13 | /// ``` 14 | /// use sample::{sample, SampleRate, Sampling}; 15 | /// use std::time::Duration; 16 | /// use tracing::info; 17 | /// 18 | /// // Sampled based on frequency of events, log only every 2 logs 19 | /// sample!(SampleRate::Frequency(2), info!("Long log")); 20 | /// 21 | /// // Sampled based on time passed, log at most once a minute 22 | /// sample!(SampleRate::Duration(Duration::from_secs(60)), info!("Long log")); 23 | /// ``` 24 | /// The rate at which a `sample!` macro will run it's given function 25 | #[derive(Debug)] 26 | pub enum SampleRate { 27 | /// Only sample a single time during a window of time. This rate only has a resolution in 28 | /// seconds. 29 | Duration(Duration), 30 | /// Sample based on the frequency of the event. The provided u64 is the inverse of the 31 | /// frequency (1/x), for example Frequency(2) means that 1 out of every 2 events will be 32 | /// sampled (1/2). 33 | Frequency(u64), 34 | /// Always Sample 35 | Always, 36 | } 37 | 38 | /// An internal struct that can be checked if a sample is ready for the `sample!` macro 39 | pub struct Sampling { 40 | rate: SampleRate, 41 | state: AtomicU64, 42 | } 43 | 44 | impl Sampling { 45 | pub const fn new(rate: SampleRate) -> Self { 46 | Self { 47 | rate, 48 | state: AtomicU64::new(0), 49 | } 50 | } 51 | 52 | pub fn sample(&self) -> bool { 53 | match &self.rate { 54 | SampleRate::Duration(rate) => Self::sample_duration(rate, &self.state), 55 | SampleRate::Frequency(rate) => Self::sample_frequency(*rate, &self.state), 56 | SampleRate::Always => true, 57 | } 58 | } 59 | 60 | fn sample_frequency(rate: u64, count: &AtomicU64) -> bool { 61 | let previous_count = count 62 | .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |count| { 63 | let new_count = if count == 0 { 64 | rate.saturating_sub(1) 65 | } else { 66 | count.saturating_sub(1) 67 | }; 68 | Some(new_count) 69 | }) 70 | .expect("Closure should always returns 'Some'. This is a Bug."); 71 | 72 | previous_count == 0 73 | } 74 | 75 | fn sample_duration(rate: &Duration, last_sample: &AtomicU64) -> bool { 76 | let rate = rate.as_secs(); 77 | // Seconds since Unix Epoch 78 | let now = SystemTime::now() 79 | .duration_since(SystemTime::UNIX_EPOCH) 80 | .expect("SystemTime before UNIX EPOCH!") 81 | .as_secs(); 82 | 83 | last_sample 84 | .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |last_sample| { 85 | if now.saturating_sub(last_sample) >= rate { 86 | Some(now) 87 | } else { 88 | None 89 | } 90 | }) 91 | .is_ok() 92 | } 93 | } 94 | 95 | /// Samples a given function at a `SampleRate`, useful for periodically emitting logs or metrics on 96 | /// high throughput pieces of code. 97 | #[macro_export] 98 | macro_rules! sample { 99 | ($sample_rate:expr, $($args:expr)+ ,) => { 100 | $crate::sample!($sample_rate, $($args)+); 101 | }; 102 | 103 | ($sample_rate:expr, $($args:tt)+) => {{ 104 | static SAMPLING: $crate::Sampling = $crate::Sampling::new($sample_rate); 105 | if SAMPLING.sample() { 106 | $($args)+ 107 | } 108 | }}; 109 | } 110 | 111 | #[cfg(test)] 112 | mod tests { 113 | use super::*; 114 | 115 | #[test] 116 | fn frequency() { 117 | // Frequency 118 | let sampling = Sampling::new(SampleRate::Frequency(10)); 119 | let mut v = Vec::new(); 120 | for i in 0..=25 { 121 | if sampling.sample() { 122 | v.push(i); 123 | } 124 | } 125 | 126 | assert_eq!(v, vec![0, 10, 20]); 127 | } 128 | 129 | #[test] 130 | fn always() { 131 | // Always 132 | let sampling = Sampling::new(SampleRate::Always); 133 | let mut v = Vec::new(); 134 | for i in 0..5 { 135 | if sampling.sample() { 136 | v.push(i); 137 | } 138 | } 139 | 140 | assert_eq!(v, vec![0, 1, 2, 3, 4]); 141 | } 142 | 143 | #[ignore] 144 | #[test] 145 | fn duration() { 146 | // Duration 147 | let sampling = Sampling::new(SampleRate::Duration(Duration::from_secs(1))); 148 | let mut v = Vec::new(); 149 | for i in 0..5 { 150 | if sampling.sample() { 151 | v.push(i); 152 | } 153 | 154 | std::thread::sleep(Duration::from_millis(500)); 155 | } 156 | 157 | assert_eq!(v.len(), 2); 158 | } 159 | 160 | #[test] 161 | fn macro_expansion() { 162 | for i in 0..10 { 163 | sample!( 164 | SampleRate::Frequency(2), 165 | println!("loooooooooooooooooooooooooong hello {}", i), 166 | ); 167 | 168 | sample!(SampleRate::Frequency(2), { 169 | println!("hello {i}"); 170 | }); 171 | 172 | sample!(SampleRate::Frequency(2), println!("hello {i}")); 173 | 174 | sample! { 175 | SampleRate::Frequency(2), 176 | 177 | for j in 10..20 { 178 | println!("hello {j}"); 179 | } 180 | } 181 | } 182 | } 183 | 184 | #[test] 185 | fn threaded() { 186 | fn work() -> usize { 187 | let mut count = 0; 188 | 189 | for _ in 0..1000 { 190 | sample!(SampleRate::Frequency(5), count += 1); 191 | } 192 | 193 | count 194 | } 195 | 196 | let mut handles = Vec::new(); 197 | for _ in 0..10 { 198 | handles.push(std::thread::spawn(work)); 199 | } 200 | 201 | let mut count = 0; 202 | for handle in handles { 203 | count += handle.join().unwrap(); 204 | } 205 | 206 | assert_eq!(count, 2000); 207 | } 208 | } 209 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-indexer-processor-sdk" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | ahash = { workspace = true } 16 | anyhow = { workspace = true } 17 | aptos-indexer-transaction-stream = { workspace = true } 18 | aptos-protos = { workspace = true } 19 | async-trait = { workspace = true } 20 | autometrics = { workspace = true } 21 | axum = { workspace = true } 22 | backtrace = { workspace = true } 23 | bcs = { workspace = true } 24 | bigdecimal = { workspace = true } 25 | chrono = { workspace = true } 26 | clap = { workspace = true } 27 | derive_builder = { workspace = true } 28 | diesel = { workspace = true, optional = true } 29 | diesel-async = { workspace = true, optional = true } 30 | diesel_migrations = { workspace = true, optional = true } 31 | field_count = { workspace = true, optional = true } 32 | futures = { workspace = true } 33 | futures-util = { workspace = true } 34 | hex = { workspace = true } 35 | indexmap = { workspace = true } 36 | instrumented-channel = { workspace = true } 37 | kanal = { workspace = true } 38 | mockall = { workspace = true } 39 | native-tls = { workspace = true, optional = true } 40 | num_cpus = { workspace = true } 41 | once_cell = { workspace = true } 42 | petgraph = { workspace = true } 43 | postgres-native-tls = { workspace = true, optional = true } 44 | prometheus = { workspace = true } 45 | prometheus-client = { workspace = true } 46 | serde = { workspace = true } 47 | serde_json = { workspace = true } 48 | serde_yaml = { workspace = true } 49 | sha2 = { workspace = true } 50 | tempfile = { workspace = true } 51 | testcontainers = { workspace = true, optional = true } 52 | thiserror = { workspace = true } 53 | tiny-keccak = { workspace = true } 54 | tokio = { workspace = true } 55 | tokio-postgres = { workspace = true, optional = true } 56 | tokio-retry = { workspace = true, optional = true } 57 | tokio-stream = { workspace = true, optional = true } 58 | toml = { workspace = true } 59 | tonic = { workspace = true, optional = true } 60 | tracing = { workspace = true } 61 | tracing-subscriber = { workspace = true } 62 | url = { workspace = true } 63 | 64 | [target.'cfg(target_os = "linux")'.dependencies] 65 | aptos-system-utils = { workspace = true } 66 | 67 | [features] 68 | postgres_partial = [ 69 | "diesel", 70 | "diesel-async", 71 | "diesel_migrations", 72 | "field_count", 73 | "postgres-native-tls", 74 | "native-tls", 75 | "tokio-postgres", 76 | ] 77 | # When using the postgres_full features we enable the diesel/postgres feature. We configure 78 | # it in a feature so the CLI can opt out, since it cannot tolerate the libpq dep. 79 | # Recall that features should always be additive. 80 | postgres_full = ["postgres_partial", "diesel/postgres"] 81 | testing_framework = ["testcontainers", "tonic", "tokio-retry", "tokio-stream"] 82 | default = [] 83 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/builder/dag.rs: -------------------------------------------------------------------------------- 1 | use crate::traits::{RunnableStep, RunnableStepWithInputReceiver}; 2 | use tokio::task::JoinHandle; 3 | 4 | pub fn connect_two_steps( 5 | left_step: RunnableStepWithInputReceiver, 6 | right_step: RightStep, 7 | channel_size: usize, 8 | ) -> ( 9 | JoinHandle<()>, 10 | RunnableStepWithInputReceiver, 11 | ) 12 | where 13 | LeftInput: Send + 'static, 14 | LeftOutput: Send + 'static, 15 | RightOutput: Send + 'static, 16 | LeftStep: RunnableStep + Send + Sized + 'static, 17 | RightStep: RunnableStep + Send + Sized + 'static, 18 | { 19 | let RunnableStepWithInputReceiver { 20 | input_receiver: left_input_receiver, 21 | _input_sender: _left_input_sender, 22 | step: left_step, 23 | .. 24 | } = left_step; 25 | 26 | let (left_output_receiver, left_handle) = left_step.spawn( 27 | Some(left_input_receiver.clone()), 28 | channel_size, 29 | _left_input_sender, 30 | ); 31 | 32 | let right_step_with_input_receiver = 33 | RunnableStepWithInputReceiver::new(left_output_receiver, right_step); 34 | 35 | (left_handle, right_step_with_input_receiver) 36 | } 37 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/builder/mod.rs: -------------------------------------------------------------------------------- 1 | mod dag; 2 | mod processor_builder; 3 | 4 | pub use processor_builder::ProcessorBuilder; 5 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/arcify_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable}, 3 | types::transaction_context::TransactionContext, 4 | utils::errors::ProcessorError, 5 | }; 6 | use std::{marker::PhantomData, sync::Arc}; 7 | 8 | pub struct ArcifyStep 9 | where 10 | Self: Sized + Send + 'static, 11 | { 12 | _marker: PhantomData, 13 | } 14 | 15 | impl ArcifyStep { 16 | pub fn new() -> Self { 17 | Self { 18 | _marker: PhantomData, 19 | } 20 | } 21 | } 22 | 23 | impl Default for ArcifyStep { 24 | fn default() -> Self { 25 | Self::new() 26 | } 27 | } 28 | 29 | #[async_trait::async_trait] 30 | impl Processable for ArcifyStep 31 | where 32 | T: Send + Sync + 'static, 33 | { 34 | type Input = Vec; 35 | type Output = Vec>; 36 | type RunType = AsyncRunType; 37 | 38 | async fn process( 39 | &mut self, 40 | item: TransactionContext>, 41 | ) -> Result>>>, ProcessorError> { 42 | Ok(Some(TransactionContext { 43 | data: item.data.into_iter().map(Arc::new).collect(), 44 | metadata: item.metadata, 45 | })) 46 | } 47 | } 48 | 49 | impl AsyncStep for ArcifyStep where T: Send + Sync + 'static {} 50 | 51 | impl NamedStep for ArcifyStep 52 | where 53 | T: Send + Sync + 'static, 54 | { 55 | fn name(&self) -> String { 56 | format!("Arcify<{}>", std::any::type_name::()) 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | use crate::types::transaction_context::TransactionMetadata; 64 | 65 | fn generate_transaction_context() -> TransactionContext> { 66 | TransactionContext { 67 | data: vec![1, 2, 3], 68 | metadata: TransactionMetadata { 69 | start_version: 0, 70 | end_version: 0, 71 | start_transaction_timestamp: None, 72 | end_transaction_timestamp: None, 73 | total_size_in_bytes: 0, 74 | }, 75 | } 76 | } 77 | 78 | #[tokio::test] 79 | #[allow(clippy::needless_return)] 80 | async fn test_arcify_step_process() { 81 | let mut step = ArcifyStep::::new(); 82 | let input = generate_transaction_context(); 83 | 84 | let result = step.process(input).await.unwrap().unwrap(); 85 | assert_eq!(result.data.len(), 3); 86 | assert_eq!(*result.data[0], 1); 87 | assert_eq!(*result.data[1], 2); 88 | assert_eq!(*result.data[2], 3); 89 | } 90 | 91 | #[tokio::test] 92 | #[allow(clippy::needless_return)] 93 | async fn test_arcify_strong_count() { 94 | let mut step = ArcifyStep::::new(); 95 | let input = generate_transaction_context(); 96 | 97 | let result = step.process(input).await.unwrap().unwrap(); 98 | assert_eq!(Arc::strong_count(&result.data[0]), 1); 99 | 100 | let arc_clone = result.data[0].clone(); 101 | assert_eq!(Arc::strong_count(&arc_clone), 2); 102 | 103 | drop(arc_clone); 104 | assert_eq!(Arc::strong_count(&result.data[0]), 1); 105 | } 106 | 107 | #[tokio::test] 108 | #[allow(clippy::needless_return)] 109 | async fn test_arcify_ptr_eq() { 110 | let mut step = ArcifyStep::::new(); 111 | let input = generate_transaction_context(); 112 | 113 | let result = step.process(input).await.unwrap().unwrap(); 114 | let arc_clone = result.data[0].clone(); 115 | assert!(Arc::ptr_eq(&result.data[0], &arc_clone)); 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod arcify_step; 2 | pub mod order_by_version_step; 3 | pub mod timed_buffer_step; 4 | pub mod transaction_stream_step; 5 | pub mod version_tracker_step; 6 | pub mod write_rate_limit_step; 7 | 8 | // Re-export the steps 9 | pub use arcify_step::ArcifyStep; 10 | pub use order_by_version_step::OrderByVersionStep; 11 | pub use timed_buffer_step::TimedBufferStep; 12 | pub use transaction_stream_step::TransactionStreamStep; 13 | pub use version_tracker_step::{ 14 | ProcessorStatusSaver, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS, 15 | }; 16 | pub use write_rate_limit_step::{Sizeable, WriteRateLimitConfig, WriteRateLimitStep}; 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/order_by_version_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use ahash::AHashMap; 9 | use anyhow::Result; 10 | use async_trait::async_trait; 11 | use std::time::Duration; 12 | 13 | /// OrderByVersionStep is a step that orders TransactionContexts by their starting versions. 14 | /// It buffers ordered TransactionContexts and releases them at every poll_interval. 15 | pub struct OrderByVersionStep 16 | where 17 | Self: Sized + Send + 'static, 18 | Input: Send + 'static, 19 | { 20 | pub ordered_versions: Vec>, 21 | pub unordered_versions: AHashMap>, 22 | pub expected_next_version: u64, 23 | // Duration to poll and return the ordered versions 24 | pub poll_interval: Duration, 25 | } 26 | 27 | impl OrderByVersionStep 28 | where 29 | Self: Sized + Send + 'static, 30 | Input: Send + 'static, 31 | { 32 | pub fn new(starting_version: u64, poll_interval: Duration) -> Self { 33 | Self { 34 | ordered_versions: Vec::new(), 35 | unordered_versions: AHashMap::new(), 36 | expected_next_version: starting_version, 37 | poll_interval, 38 | } 39 | } 40 | 41 | fn update_ordered_versions(&mut self) { 42 | // While there are batches in unordered_versions that are in order, add them to ordered_versions 43 | while let Some(batch) = self 44 | .unordered_versions 45 | .remove(&(self.expected_next_version)) 46 | { 47 | self.expected_next_version = batch.metadata.end_version + 1; 48 | self.ordered_versions.push(batch); 49 | } 50 | } 51 | } 52 | 53 | #[async_trait] 54 | impl Processable for OrderByVersionStep 55 | where 56 | Input: Send + Sync + 'static, 57 | { 58 | type Input = Input; 59 | type Output = Input; 60 | type RunType = PollableAsyncRunType; 61 | 62 | async fn process( 63 | &mut self, 64 | current_batch: TransactionContext, 65 | ) -> Result>, ProcessorError> { 66 | // If there's a gap in the expected_next_version and current_version 67 | // have the current_version to unordered_versions for later processing. 68 | if self.expected_next_version != current_batch.metadata.start_version { 69 | tracing::debug!( 70 | next_version = self.expected_next_version, 71 | step = self.name(), 72 | "Gap detected starting from version: {}", 73 | current_batch.metadata.start_version 74 | ); 75 | self.unordered_versions 76 | .insert(current_batch.metadata.start_version, current_batch); 77 | } else { 78 | tracing::debug!("No gap detected"); 79 | self.expected_next_version = current_batch.metadata.end_version + 1; 80 | self.ordered_versions.push(current_batch); 81 | 82 | // If the current_versions is the expected_next_version, update the ordered_versions 83 | self.update_ordered_versions(); 84 | } 85 | 86 | // TODO: Consider adding a metric for the number of unordered_versions for debugging, performance tesing 87 | 88 | // Pass through 89 | Ok(None) // No immediate output 90 | } 91 | 92 | // Once polling ends, release the remaining ordered items in buffer 93 | async fn cleanup( 94 | &mut self, 95 | ) -> Result>>, ProcessorError> { 96 | Ok(Some(std::mem::take(&mut self.ordered_versions))) 97 | } 98 | } 99 | 100 | #[async_trait] 101 | impl PollableAsyncStep for OrderByVersionStep { 102 | fn poll_interval(&self) -> Duration { 103 | self.poll_interval 104 | } 105 | 106 | async fn poll(&mut self) -> Result>>, ProcessorError> { 107 | Ok(Some(std::mem::take(&mut self.ordered_versions))) 108 | } 109 | } 110 | 111 | impl NamedStep for OrderByVersionStep { 112 | // TODO: oncecell this somehow? Likely in wrapper struct... 113 | fn name(&self) -> String { 114 | format!("OrderByVersionStep: {}", std::any::type_name::()) 115 | } 116 | } 117 | 118 | #[cfg(test)] 119 | mod tests { 120 | use super::*; 121 | use crate::{ 122 | builder::ProcessorBuilder, 123 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 124 | traits::{IntoRunnableStep, RunnableStepWithInputReceiver}, 125 | types::transaction_context::TransactionMetadata, 126 | }; 127 | use instrumented_channel::instrumented_bounded_channel; 128 | 129 | fn generate_unordered_transaction_contexts() -> Vec> { 130 | vec![ 131 | TransactionContext { 132 | data: (), 133 | metadata: TransactionMetadata { 134 | start_version: 100, 135 | end_version: 199, 136 | start_transaction_timestamp: None, 137 | end_transaction_timestamp: None, 138 | total_size_in_bytes: 0, 139 | }, 140 | }, 141 | TransactionContext { 142 | data: (), 143 | metadata: TransactionMetadata { 144 | start_version: 0, 145 | end_version: 99, 146 | start_transaction_timestamp: None, 147 | end_transaction_timestamp: None, 148 | total_size_in_bytes: 0, 149 | }, 150 | }, 151 | ] 152 | } 153 | 154 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 155 | #[allow(clippy::needless_return)] 156 | async fn test_order_step() { 157 | // Setup 158 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 159 | let input_step = RunnableStepWithInputReceiver::new( 160 | input_receiver, 161 | PassThroughStep::default().into_runnable_step(), 162 | ); 163 | let order_step = OrderByVersionStep::<()>::new(0, Duration::from_millis(250)); 164 | 165 | let (_pb, mut output_receiver) = 166 | ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 167 | .connect_to(order_step.into_runnable_step(), 5) 168 | .end_and_return_output_receiver(5); 169 | 170 | let unordered_transaction_contexts = generate_unordered_transaction_contexts(); 171 | let mut ordered_transaction_contexts = unordered_transaction_contexts.clone(); 172 | ordered_transaction_contexts.sort(); 173 | 174 | for transaction_context in unordered_transaction_contexts { 175 | input_sender.send(transaction_context).await.unwrap(); 176 | } 177 | tokio::time::sleep(Duration::from_millis(500)).await; 178 | 179 | for ordered_transaction_context in ordered_transaction_contexts { 180 | let result = receive_with_timeout(&mut output_receiver, 100) 181 | .await 182 | .unwrap(); 183 | assert_eq!( 184 | result.metadata.start_version, 185 | ordered_transaction_context.metadata.start_version 186 | ); 187 | } 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/timed_buffer_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use anyhow::Result; 9 | use async_trait::async_trait; 10 | use std::time::Duration; 11 | 12 | pub struct TimedBufferStep 13 | where 14 | Self: Sized + Send + 'static, 15 | Input: Send + 'static, 16 | { 17 | pub internal_buffer: Vec>, 18 | pub poll_interval: Duration, 19 | } 20 | 21 | impl TimedBufferStep 22 | where 23 | Self: Sized + Send + 'static, 24 | Input: Send + 'static, 25 | { 26 | #[allow(dead_code)] 27 | pub fn new(poll_interval: Duration) -> Self { 28 | Self { 29 | internal_buffer: Vec::new(), 30 | poll_interval, 31 | } 32 | } 33 | } 34 | 35 | #[async_trait] 36 | impl Processable for TimedBufferStep 37 | where 38 | Input: Send + Sync + 'static, 39 | { 40 | type Input = Input; 41 | type Output = Input; 42 | type RunType = PollableAsyncRunType; 43 | 44 | async fn process( 45 | &mut self, 46 | item: TransactionContext, 47 | ) -> Result>, ProcessorError> { 48 | self.internal_buffer.push(item); 49 | Ok(None) // No immediate output 50 | } 51 | 52 | // Once polling ends, release the remaining items in buffer 53 | async fn cleanup( 54 | &mut self, 55 | ) -> Result>>, ProcessorError> { 56 | Ok(Some(std::mem::take(&mut self.internal_buffer))) 57 | } 58 | } 59 | 60 | #[async_trait] 61 | impl PollableAsyncStep for TimedBufferStep { 62 | fn poll_interval(&self) -> Duration { 63 | self.poll_interval 64 | } 65 | 66 | async fn poll(&mut self) -> Result>>, ProcessorError> { 67 | Ok(Some(std::mem::take(&mut self.internal_buffer))) 68 | } 69 | } 70 | 71 | impl NamedStep for TimedBufferStep { 72 | // TODO: oncecell this somehow? Likely in wrapper struct... 73 | fn name(&self) -> String { 74 | format!("TimedBuffer: {}", std::any::type_name::()) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/transaction_stream_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{NamedStep, PollableAsyncRunType, PollableAsyncStep, Processable}, 3 | types::transaction_context::{TransactionContext, TransactionMetadata}, 4 | utils::errors::ProcessorError, 5 | }; 6 | use anyhow::Result; 7 | use aptos_indexer_transaction_stream::{ 8 | TransactionStream as TransactionStreamInternal, TransactionStreamConfig, 9 | }; 10 | use aptos_protos::transaction::v1::Transaction; 11 | use async_trait::async_trait; 12 | use mockall::mock; 13 | use std::time::Duration; 14 | use tokio::sync::Mutex; 15 | use tracing::{error, info, warn}; 16 | 17 | // TransactionStreamStep is establishes a gRPC connection with Transaction Stream 18 | // fetches transactions, and outputs them for processing. It also handles reconnections with retries. 19 | // This is usually the initial step in a processor. 20 | pub struct TransactionStreamStep 21 | where 22 | Self: Sized + Send + 'static, 23 | { 24 | transaction_stream_config: TransactionStreamConfig, 25 | pub transaction_stream: Mutex, 26 | } 27 | 28 | impl TransactionStreamStep 29 | where 30 | Self: Sized + Send + 'static, 31 | { 32 | pub async fn new( 33 | transaction_stream_config: TransactionStreamConfig, 34 | ) -> Result { 35 | let transaction_stream_res = 36 | TransactionStreamInternal::new(transaction_stream_config.clone()).await; 37 | match transaction_stream_res { 38 | Err(e) => Err(ProcessorError::StepInitError { 39 | message: format!("Error creating transaction stream: {e:?}"), 40 | }), 41 | Ok(transaction_stream) => Ok(Self { 42 | transaction_stream: Mutex::new(transaction_stream), 43 | transaction_stream_config, 44 | }), 45 | } 46 | } 47 | } 48 | 49 | #[async_trait] 50 | impl Processable for TransactionStreamStep 51 | where 52 | Self: Sized + Send + 'static, 53 | { 54 | type Input = (); 55 | // The TransactionStreamStep will output a batch of transactions for processing 56 | type Output = Vec; 57 | type RunType = PollableAsyncRunType; 58 | 59 | async fn process( 60 | &mut self, 61 | _item: TransactionContext<()>, 62 | ) -> Result>>, ProcessorError> { 63 | Ok(None) 64 | } 65 | } 66 | 67 | #[async_trait] 68 | impl PollableAsyncStep for TransactionStreamStep 69 | where 70 | Self: Sized + Send + Sync + 'static, 71 | { 72 | fn poll_interval(&self) -> std::time::Duration { 73 | Duration::from_secs(0) 74 | } 75 | 76 | async fn poll( 77 | &mut self, 78 | ) -> Result>>>, ProcessorError> { 79 | let txn_pb_response_res = self 80 | .transaction_stream 81 | .lock() 82 | .await 83 | .get_next_transaction_batch() 84 | .await; 85 | match txn_pb_response_res { 86 | Ok(txn_pb_response) => { 87 | let transactions_with_context = TransactionContext { 88 | data: txn_pb_response.transactions, 89 | metadata: TransactionMetadata { 90 | start_version: txn_pb_response.start_version, 91 | end_version: txn_pb_response.end_version, 92 | start_transaction_timestamp: txn_pb_response.start_txn_timestamp, 93 | end_transaction_timestamp: txn_pb_response.end_txn_timestamp, 94 | total_size_in_bytes: txn_pb_response.size_in_bytes, 95 | }, 96 | }; 97 | Ok(Some(vec![transactions_with_context])) 98 | }, 99 | Err(e) => { 100 | warn!( 101 | stream_address = self.transaction_stream_config.indexer_grpc_data_service_address.to_string(), 102 | error = ?e, 103 | "Error fetching transactions from TransactionStream. Attempting to reconnect." 104 | ); 105 | 106 | // TransactionStream closes connections every 5 minutes. We should try to reconnect 107 | match self 108 | .transaction_stream 109 | .lock() 110 | .await 111 | .reconnect_to_grpc_with_retries() 112 | .await 113 | { 114 | Ok(_) => { 115 | info!( 116 | stream_address = self 117 | .transaction_stream_config 118 | .indexer_grpc_data_service_address 119 | .to_string(), 120 | "Successfully reconnected to TransactionStream." 121 | ); 122 | // Return nothing for now. The next poll will fetch the next batch of transactions. 123 | Ok(None) 124 | }, 125 | Err(e) => { 126 | error!( 127 | stream_address = self.transaction_stream_config 128 | .indexer_grpc_data_service_address 129 | .to_string(), 130 | error = ?e, 131 | " Error reconnecting transaction stream." 132 | ); 133 | Err(ProcessorError::PollError { 134 | message: format!("Error reconnecting to TransactionStream: {e:?}"), 135 | }) 136 | }, 137 | } 138 | }, 139 | } 140 | } 141 | 142 | async fn should_continue_polling(&mut self) -> bool { 143 | let is_end = self.transaction_stream.lock().await.is_end_of_stream(); 144 | if is_end { 145 | info!("Reached ending version"); 146 | } 147 | !is_end 148 | } 149 | } 150 | 151 | impl NamedStep for TransactionStreamStep { 152 | fn name(&self) -> String { 153 | "TransactionStreamStep".to_string() 154 | } 155 | } 156 | 157 | mock! { 158 | pub TransactionStreamStep {} 159 | 160 | #[async_trait] 161 | impl Processable for TransactionStreamStep 162 | where Self: Sized + Send + 'static, 163 | { 164 | type Input = (); 165 | type Output = Vec; 166 | type RunType = PollableAsyncRunType; 167 | 168 | async fn init(&mut self); 169 | 170 | async fn process(&mut self, _item: TransactionContext<()> ) -> Result>>, ProcessorError>; 171 | } 172 | 173 | #[async_trait] 174 | impl PollableAsyncStep for TransactionStreamStep 175 | where 176 | Self: Sized + Send + 'static, 177 | { 178 | fn poll_interval(&self) -> std::time::Duration; 179 | 180 | // async fn poll(&mut self) -> Option> { 181 | // // Testing framework can provide mocked transactions here 182 | // Some(vec![TransactionsPBResponse { 183 | // transactions: vec![], 184 | // chain_id: 0, 185 | // start_version: 0, 186 | // end_version: 100, 187 | // start_txn_timestamp: None, 188 | // end_txn_timestamp: None, 189 | // size_in_bytes: 10, 190 | // }]) 191 | // } 192 | async fn poll(&mut self) -> Result>>>, ProcessorError>; 193 | 194 | async fn should_continue_polling(&mut self) -> bool; 195 | } 196 | 197 | impl NamedStep for TransactionStreamStep { 198 | fn name(&self) -> String; 199 | } 200 | } 201 | 202 | #[cfg(test)] 203 | mod tests { 204 | use super::*; 205 | use crate::{ 206 | builder::ProcessorBuilder, 207 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 208 | traits::IntoRunnableStep, 209 | types::transaction_context::TransactionMetadata, 210 | }; 211 | use mockall::Sequence; 212 | use std::time::Duration; 213 | 214 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 215 | #[allow(clippy::needless_return)] 216 | async fn test_transaction_stream() { 217 | let mut mock_transaction_stream = MockTransactionStreamStep::new(); 218 | // Testing framework can provide mocked transactions here 219 | mock_transaction_stream.expect_poll().returning(|| { 220 | Ok(Some(vec![TransactionContext { 221 | data: vec![Transaction::default()], 222 | metadata: TransactionMetadata { 223 | start_version: 0, 224 | end_version: 100, 225 | start_transaction_timestamp: None, 226 | end_transaction_timestamp: None, 227 | total_size_in_bytes: 10, 228 | }, 229 | }])) 230 | }); 231 | mock_transaction_stream 232 | .expect_poll_interval() 233 | .returning(|| Duration::from_secs(0)); 234 | mock_transaction_stream.expect_init().returning(|| { 235 | // Do nothing 236 | }); 237 | mock_transaction_stream 238 | .expect_name() 239 | .returning(|| "MockTransactionStream".to_string()); 240 | 241 | // Set up the mock transaction stream to poll 3 times 242 | let mut seq = Sequence::new(); 243 | mock_transaction_stream 244 | .expect_should_continue_polling() 245 | .times(3) 246 | .in_sequence(&mut seq) 247 | .return_const(true); 248 | mock_transaction_stream 249 | .expect_should_continue_polling() 250 | .return_const(false); 251 | 252 | let pass_through_step = PassThroughStep::default(); 253 | 254 | let (_, mut output_receiver) = ProcessorBuilder::new_with_inputless_first_step( 255 | mock_transaction_stream.into_runnable_step(), 256 | ) 257 | .connect_to(pass_through_step.into_runnable_step(), 5) 258 | .end_and_return_output_receiver(5); 259 | 260 | tokio::time::sleep(Duration::from_millis(250)).await; 261 | for _ in 0..3 { 262 | let result = receive_with_timeout(&mut output_receiver, 100) 263 | .await 264 | .unwrap(); 265 | 266 | assert_eq!(result.data.len(), 1); 267 | } 268 | 269 | // After receiving 3 outputs, the channel should be empty 270 | let result = receive_with_timeout(&mut output_receiver, 100).await; 271 | assert!(result.is_none()); 272 | } 273 | } 274 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/common_steps/version_tracker_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::errors::ProcessorError, 7 | }; 8 | use anyhow::Result; 9 | use async_trait::async_trait; 10 | use std::marker::PhantomData; 11 | 12 | pub const DEFAULT_UPDATE_PROCESSOR_STATUS_SECS: u64 = 1; 13 | 14 | /// The `ProcessorStatusSaver` trait object should be implemented in order to save the latest successfully 15 | /// processed transaction versino to storage. I.e., persisting the `processor_status` to storage. 16 | #[async_trait] 17 | pub trait ProcessorStatusSaver { 18 | // T represents the transaction type that the processor is tracking. 19 | async fn save_processor_status( 20 | &self, 21 | last_success_batch: &TransactionContext<()>, 22 | ) -> Result<(), ProcessorError>; 23 | } 24 | 25 | /// Tracks the versioned processing of sequential transactions, ensuring no gaps 26 | /// occur between them. 27 | /// 28 | /// Important: this step assumes ordered transactions. Please use the `OrederByVersionStep` before this step 29 | /// if the transactions are not ordered. 30 | pub struct VersionTrackerStep 31 | where 32 | Self: Sized + Send + 'static, 33 | T: Send + 'static, 34 | S: ProcessorStatusSaver + Send + 'static, 35 | { 36 | // Last successful batch of sequentially processed transactions. Includes metadata to write to storage. 37 | last_success_batch: Option>, 38 | polling_interval_secs: u64, 39 | processor_status_saver: S, 40 | _marker: PhantomData, 41 | } 42 | 43 | impl VersionTrackerStep 44 | where 45 | Self: Sized + Send + 'static, 46 | T: Send + 'static, 47 | S: ProcessorStatusSaver + Send + 'static, 48 | { 49 | pub fn new(processor_status_saver: S, polling_interval_secs: u64) -> Self { 50 | Self { 51 | last_success_batch: None, 52 | processor_status_saver, 53 | polling_interval_secs, 54 | _marker: PhantomData, 55 | } 56 | } 57 | 58 | async fn save_processor_status(&mut self) -> Result<(), ProcessorError> { 59 | if let Some(last_success_batch) = self.last_success_batch.as_ref() { 60 | self.processor_status_saver 61 | .save_processor_status(last_success_batch) 62 | .await 63 | } else { 64 | Ok(()) 65 | } 66 | } 67 | } 68 | 69 | #[async_trait] 70 | impl Processable for VersionTrackerStep 71 | where 72 | Self: Sized + Send + 'static, 73 | T: Send + 'static, 74 | S: ProcessorStatusSaver + Send + 'static, 75 | { 76 | type Input = T; 77 | type Output = T; 78 | type RunType = PollableAsyncRunType; 79 | 80 | async fn process( 81 | &mut self, 82 | current_batch: TransactionContext, 83 | ) -> Result>, ProcessorError> { 84 | // If there's a gap in version, return an error 85 | if let Some(last_success_batch) = self.last_success_batch.as_ref() { 86 | if last_success_batch.metadata.end_version + 1 != current_batch.metadata.start_version { 87 | return Err(ProcessorError::ProcessError { 88 | message: format!( 89 | "Gap detected starting from version: {}", 90 | current_batch.metadata.start_version 91 | ), 92 | }); 93 | } 94 | } 95 | 96 | // Update the last success batch 97 | self.last_success_batch = Some(TransactionContext { 98 | data: (), 99 | metadata: current_batch.metadata.clone(), 100 | }); 101 | 102 | // Pass through 103 | Ok(Some(current_batch)) 104 | } 105 | 106 | async fn cleanup( 107 | &mut self, 108 | ) -> Result>>, ProcessorError> { 109 | // If processing or polling ends, save the last successful batch to the database. 110 | self.save_processor_status().await?; 111 | Ok(None) 112 | } 113 | } 114 | 115 | #[async_trait] 116 | impl PollableAsyncStep for VersionTrackerStep 117 | where 118 | Self: Sized + Send + Sync + 'static, 119 | T: Send + Sync + 'static, 120 | S: ProcessorStatusSaver + Send + Sync + 'static, 121 | { 122 | fn poll_interval(&self) -> std::time::Duration { 123 | std::time::Duration::from_secs(self.polling_interval_secs) 124 | } 125 | 126 | async fn poll(&mut self) -> Result>>, ProcessorError> { 127 | // TODO: Add metrics for gap count 128 | self.save_processor_status().await?; 129 | // Nothing should be returned 130 | Ok(None) 131 | } 132 | } 133 | 134 | impl NamedStep for VersionTrackerStep 135 | where 136 | Self: Sized + Send + 'static, 137 | T: Send + 'static, 138 | S: ProcessorStatusSaver + Send + 'static, 139 | { 140 | fn name(&self) -> String { 141 | format!("VersionTrackerStep: {}", std::any::type_name::()) 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod builder; 2 | pub mod common_steps; // TODO: Feature gate this? 3 | #[cfg(feature = "postgres_partial")] 4 | pub mod postgres; 5 | pub mod server_framework; 6 | pub mod test; 7 | #[cfg(feature = "testing_framework")] 8 | pub mod testing_framework; 9 | pub mod traits; 10 | pub mod types; 11 | pub mod utils; 12 | 13 | // Re-exporting crates to provide a cohesive SDK interface 14 | pub use aptos_indexer_transaction_stream; 15 | pub use aptos_protos; 16 | pub use bcs; 17 | pub use instrumented_channel; 18 | 19 | #[cfg(test)] 20 | mod tests { 21 | use crate::{ 22 | builder::ProcessorBuilder, 23 | common_steps::TimedBufferStep, 24 | test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout}, 25 | traits::{ 26 | AsyncStep, IntoRunnableStep, NamedStep, Processable, RunnableAsyncStep, 27 | RunnableStepWithInputReceiver, 28 | }, 29 | types::transaction_context::{TransactionContext, TransactionMetadata}, 30 | utils::errors::ProcessorError, 31 | }; 32 | use anyhow::Result; 33 | use async_trait::async_trait; 34 | use instrumented_channel::instrumented_bounded_channel; 35 | use std::time::Duration; 36 | 37 | #[derive(Clone, Debug, PartialEq)] 38 | pub struct TestStruct { 39 | pub i: usize, 40 | } 41 | 42 | fn make_test_structs(num: usize) -> Vec { 43 | (1..(num + 1)).map(|i| TestStruct { i }).collect() 44 | } 45 | 46 | pub struct TestStep; 47 | 48 | impl AsyncStep for TestStep {} 49 | 50 | impl NamedStep for TestStep { 51 | fn name(&self) -> String { 52 | "TestStep".to_string() 53 | } 54 | } 55 | 56 | #[async_trait] 57 | impl Processable for TestStep { 58 | type Input = Vec; 59 | type Output = Vec; 60 | type RunType = (); 61 | 62 | async fn process( 63 | &mut self, 64 | item: TransactionContext>, 65 | ) -> Result>>, ProcessorError> { 66 | let processed = item.data.into_iter().map(|i| TestStruct { i }).collect(); 67 | Ok(Some(TransactionContext { 68 | data: processed, 69 | metadata: item.metadata, 70 | })) 71 | } 72 | } 73 | 74 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 75 | #[allow(clippy::needless_return)] 76 | async fn test_connect_two_steps() { 77 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 78 | 79 | let input_step = RunnableStepWithInputReceiver::new( 80 | input_receiver, 81 | RunnableAsyncStep::new(PassThroughStep::default()), 82 | ); 83 | 84 | // Create a timed buffer that outputs the input after 1 second 85 | let timed_buffer_step = TimedBufferStep::>::new(Duration::from_millis(200)); 86 | let first_step = timed_buffer_step; 87 | 88 | let second_step = TestStep; 89 | let second_step = RunnableAsyncStep::new(second_step); 90 | 91 | let builder = ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 92 | .connect_to(first_step.into_runnable_step(), 5) 93 | .connect_to(second_step, 3); 94 | 95 | let mut fanout_builder = builder.fanout_broadcast(2); 96 | let (_, first_output_receiver) = fanout_builder 97 | .get_processor_builder() 98 | .unwrap() 99 | .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 1) 100 | .end_and_return_output_receiver(1); 101 | 102 | let (second_builder, second_output_receiver) = fanout_builder 103 | .get_processor_builder() 104 | .unwrap() 105 | .connect_to( 106 | RunnableAsyncStep::new(PassThroughStep::new_named("MaxStep".to_string())), 107 | 2, 108 | ) 109 | .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 5) 110 | .end_and_return_output_receiver(5); 111 | 112 | let mut output_receivers = [first_output_receiver, second_output_receiver]; 113 | 114 | output_receivers.iter().for_each(|output_receiver| { 115 | assert_eq!(output_receiver.len(), 0, "Output should be empty"); 116 | }); 117 | 118 | let left_input = TransactionContext { 119 | data: vec![1, 2, 3], 120 | metadata: TransactionMetadata { 121 | start_version: 0, 122 | end_version: 1, 123 | start_transaction_timestamp: None, 124 | end_transaction_timestamp: None, 125 | total_size_in_bytes: 0, 126 | }, 127 | }; 128 | input_sender.send(left_input.clone()).await.unwrap(); 129 | tokio::time::sleep(Duration::from_millis(250)).await; 130 | 131 | output_receivers.iter().for_each(|output_receiver| { 132 | assert_eq!(output_receiver.len(), 1, "Output should have 1 item"); 133 | }); 134 | 135 | for output_receiver in output_receivers.iter_mut() { 136 | let result = receive_with_timeout(output_receiver, 100).await.unwrap(); 137 | 138 | assert_eq!( 139 | result.data, 140 | make_test_structs(3), 141 | "Output should be the same as input" 142 | ); 143 | } 144 | 145 | let graph = second_builder.graph; 146 | let dot = graph.dot(); 147 | println!("{dot:}"); 148 | //first_handle.abort(); 149 | //second_handle.abort(); 150 | } 151 | 152 | #[tokio::test(flavor = "multi_thread", worker_threads = 2)] 153 | #[allow(clippy::needless_return)] 154 | async fn test_fanin() { 155 | let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1); 156 | 157 | let input_step = RunnableStepWithInputReceiver::new( 158 | input_receiver, 159 | RunnableAsyncStep::new(PassThroughStep::default()), 160 | ); 161 | 162 | let mut fanout_builder = 163 | ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step) 164 | .fanout_broadcast(2); 165 | 166 | let (first_builder, first_output_receiver) = fanout_builder 167 | .get_processor_builder() 168 | .unwrap() 169 | .connect_to( 170 | RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep1".to_string())), 171 | 5, 172 | ) 173 | .end_and_return_output_receiver(5); 174 | 175 | let (second_builder, second_output_receiver) = fanout_builder 176 | .get_processor_builder() 177 | .unwrap() 178 | .connect_to( 179 | RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep2".to_string())), 180 | 5, 181 | ) 182 | .end_and_return_output_receiver(5); 183 | 184 | let test_step = TestStep; 185 | let test_step = RunnableAsyncStep::new(test_step); 186 | 187 | let (_, mut fanin_output_receiver) = ProcessorBuilder::new_with_fanin_step_with_receivers( 188 | vec![ 189 | (first_output_receiver, first_builder.graph), 190 | (second_output_receiver, second_builder.graph), 191 | ], 192 | RunnableAsyncStep::new(PassThroughStep::new_named("FaninStep".to_string())), 193 | 3, 194 | ) 195 | .connect_to(test_step, 10) 196 | .end_and_return_output_receiver(6); 197 | 198 | assert_eq!(fanin_output_receiver.len(), 0, "Output should be empty"); 199 | 200 | let left_input = TransactionContext { 201 | data: vec![1, 2, 3], 202 | metadata: TransactionMetadata { 203 | start_version: 0, 204 | end_version: 1, 205 | start_transaction_timestamp: None, 206 | end_transaction_timestamp: None, 207 | total_size_in_bytes: 0, 208 | }, 209 | }; 210 | input_sender.send(left_input.clone()).await.unwrap(); 211 | tokio::time::sleep(Duration::from_millis(250)).await; 212 | 213 | assert_eq!(fanin_output_receiver.len(), 2, "Output should have 2 items"); 214 | 215 | for _ in 0..2 { 216 | let result = receive_with_timeout(&mut fanin_output_receiver, 100) 217 | .await 218 | .unwrap(); 219 | 220 | assert_eq!( 221 | result.data, 222 | make_test_structs(3), 223 | "Output should be the same as input" 224 | ); 225 | } 226 | 227 | let graph = fanout_builder.graph; 228 | let dot = graph.dot(); 229 | println!("{dot:}"); 230 | //first_handle.abort(); 231 | //second_handle.abort(); 232 | } 233 | } 234 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/README.md: -------------------------------------------------------------------------------- 1 | # Postgres crate 2 | 3 | ## About 4 | This crate provides a Postgres implementation for the integration layer between the Indexer SDK and Postgres. Features included are tracking the last processed version, retrieving the start version, and validating the chain id. The key components of this crate are core schema and models, Diesel utility functions, and trait implementations. 5 | 6 | ## How to use 7 | 1. Install Postgres and Diesel CLI 8 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`: 9 | ``` 10 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] } 11 | ``` 12 | 3. Copy the `src/db` folder into where you are managing your Diesel migrations. -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/README.md: -------------------------------------------------------------------------------- 1 | # Custom processor function 2 | 3 | Utility function that lets you create a Postgres processor. It works by running the code in `run_processor` method and applying a `process_function` on each transaction. 4 | 5 | ## How to use 6 | 1. Install Postgres and Diesel CLI 7 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`: 8 | ``` 9 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] } 10 | ``` 11 | 3. Setup Diesel and define your DB migrations. 12 | 4. In `main.rs`, call the `process` function with your indexing logic. You'll need to implement this part: 13 | ``` 14 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("/path/to/src/db/migrations"); 15 | process( 16 | "processor_name".to_string(), 17 | MIGRATIONS, 18 | async |transactions, conn_pool| { 19 | // Implement your indexing logic 20 | }, 21 | ) 22 | .await?; 23 | ``` 24 | The `process` function is an abstraction around a regular SDK processor. 25 | 26 | It runs your db migrations, validates the chain id, connects to Transaction Stream, tracks the last successful version, and processes transactions using your custom indexing logic. 27 | 28 | See [`postgres-basic-events-example`](https://github.com/aptos-labs/aptos-indexer-processor-sdk/tree/main/examples/postgres-basic-events-example) for an example on how to use this function to create a simple processor that writes events to Postgres. 29 | 30 | 5. Construct a `config.yaml` file with this example: 31 | ``` 32 | # This is a template yaml for the processor 33 | health_check_port: 8085 34 | server_config: 35 | transaction_stream_config: 36 | indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443" 37 | auth_token: "AUTH_TOKEN" 38 | request_name_header: "PROCESSOR_NAME" 39 | starting_version: 0 40 | postgres_config: 41 | connection_string: postgresql://postgres:@localhost:5432/example 42 | ``` 43 | 6. Run processor using this command `cargo run -p postgres-basic-events-example -- -c /path/to/config.yaml` 44 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_function.rs: -------------------------------------------------------------------------------- 1 | use super::basic_processor_step::BasicProcessorStep; 2 | use crate::{ 3 | aptos_indexer_transaction_stream::TransactionStreamConfig, 4 | builder::ProcessorBuilder, 5 | common_steps::{ 6 | TransactionStreamStep, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS, 7 | }, 8 | postgres::{ 9 | subconfigs::postgres_config::PostgresConfig, 10 | utils::{ 11 | checkpoint::{ 12 | get_starting_version, PostgresChainIdChecker, PostgresProcessorStatusSaver, 13 | }, 14 | database::{new_db_pool, run_migrations, ArcDbPool}, 15 | }, 16 | SDK_MIGRATIONS, 17 | }, 18 | server_framework::{ 19 | load, register_probes_and_metrics_handler, setup_logging, setup_panic_handler, 20 | GenericConfig, ServerArgs, 21 | }, 22 | traits::IntoRunnableStep, 23 | utils::{chain_id_check::check_or_update_chain_id, errors::ProcessorError}, 24 | }; 25 | use anyhow::Result; 26 | use aptos_protos::transaction::v1::Transaction; 27 | use clap::Parser; 28 | use diesel_migrations::EmbeddedMigrations; 29 | use serde::{Deserialize, Serialize}; 30 | use tracing::info; 31 | 32 | #[derive(Clone, Debug, Deserialize, Serialize)] 33 | #[serde(deny_unknown_fields)] 34 | pub struct ProcessConfig { 35 | pub transaction_stream_config: TransactionStreamConfig, 36 | pub postgres_config: PostgresConfig, 37 | } 38 | 39 | /// Processes transactions with a custom handler function. 40 | pub async fn process( 41 | processor_name: String, 42 | embedded_migrations: EmbeddedMigrations, 43 | process_function: F, 44 | ) -> Result<()> 45 | where 46 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + Sync + 'static, 47 | Fut: std::future::Future> + Send + 'static, 48 | { 49 | let args = ServerArgs::parse(); 50 | setup_logging(); 51 | setup_panic_handler(); 52 | let config = load::>(&args.config_path)?; 53 | let handle = tokio::runtime::Handle::current(); 54 | 55 | let health_port = config.health_check_port; 56 | let additional_labels = config.metrics_config.additional_labels.clone(); 57 | // Start liveness and readiness probes. 58 | let task_handler = handle.spawn(async move { 59 | register_probes_and_metrics_handler(health_port, additional_labels).await; 60 | anyhow::Ok(()) 61 | }); 62 | let main_task_handler = handle.spawn(async move { 63 | run_processor( 64 | processor_name, 65 | config.server_config.transaction_stream_config, 66 | config.server_config.postgres_config, 67 | embedded_migrations, 68 | process_function, 69 | ) 70 | .await 71 | }); 72 | tokio::select! { 73 | res = task_handler => { 74 | res.expect("Probes and metrics handler unexpectedly exited") 75 | }, 76 | res = main_task_handler => { 77 | res.expect("Main task handler unexpectedly exited") 78 | }, 79 | } 80 | } 81 | 82 | async fn run_processor( 83 | processor_name: String, 84 | transaction_stream_config: TransactionStreamConfig, 85 | postgres_config: PostgresConfig, 86 | embedded_migrations: EmbeddedMigrations, 87 | process_function: F, 88 | ) -> Result<()> 89 | where 90 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + Sync + 'static, 91 | Fut: std::future::Future> + Send + 'static, 92 | { 93 | // Create a connection pool 94 | let db_pool = new_db_pool( 95 | &postgres_config.connection_string, 96 | Some(postgres_config.db_pool_size), 97 | ) 98 | .await 99 | .expect("Failed to create connection pool"); 100 | 101 | // Run user migrations 102 | run_migrations( 103 | postgres_config.connection_string.clone(), 104 | db_pool.clone(), 105 | embedded_migrations, 106 | ) 107 | .await; 108 | 109 | // Run SDK migrations 110 | run_migrations( 111 | postgres_config.connection_string.clone(), 112 | db_pool.clone(), 113 | SDK_MIGRATIONS, 114 | ) 115 | .await; 116 | 117 | check_or_update_chain_id( 118 | &transaction_stream_config, 119 | &PostgresChainIdChecker::new(db_pool.clone()), 120 | ) 121 | .await?; 122 | 123 | // Merge the starting version from config and the latest processed version from the DB 124 | let starting_version = get_starting_version( 125 | processor_name.as_str(), 126 | transaction_stream_config.clone(), 127 | db_pool.clone(), 128 | ) 129 | .await?; 130 | 131 | // Define processor steps 132 | let transaction_stream_config = transaction_stream_config.clone(); 133 | let transaction_stream = TransactionStreamStep::new(TransactionStreamConfig { 134 | starting_version: Some(starting_version), 135 | ..transaction_stream_config 136 | }) 137 | .await?; 138 | let basic_processor_step = BasicProcessorStep { 139 | process_function, 140 | conn_pool: db_pool.clone(), 141 | }; 142 | let processor_status_saver = 143 | PostgresProcessorStatusSaver::new(processor_name.as_str(), db_pool.clone()); 144 | let version_tracker = 145 | VersionTrackerStep::new(processor_status_saver, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS); 146 | 147 | // Connect processor steps together 148 | let (_, buffer_receiver) = 149 | ProcessorBuilder::new_with_inputless_first_step(transaction_stream.into_runnable_step()) 150 | .connect_to(basic_processor_step.into_runnable_step(), 10) 151 | .connect_to(version_tracker.into_runnable_step(), 10) 152 | .end_and_return_output_receiver(10); 153 | 154 | // (Optional) Parse the results 155 | loop { 156 | match buffer_receiver.recv().await { 157 | Ok(_) => {}, 158 | Err(_) => { 159 | info!("Channel is closed"); 160 | return Ok(()); 161 | }, 162 | } 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | postgres::utils::database::ArcDbPool, 3 | traits::{AsyncRunType, AsyncStep, NamedStep, Processable}, 4 | types::transaction_context::TransactionContext, 5 | utils::errors::ProcessorError, 6 | }; 7 | use anyhow::Result; 8 | use aptos_protos::transaction::v1::Transaction; 9 | use async_trait::async_trait; 10 | 11 | // Basic process step that runs a process function on each transaction 12 | pub struct BasicProcessorStep 13 | where 14 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 15 | Fut: std::future::Future> + Send + 'static, 16 | { 17 | pub process_function: F, 18 | pub conn_pool: ArcDbPool, 19 | } 20 | 21 | #[async_trait] 22 | impl Processable for BasicProcessorStep 23 | where 24 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 25 | Fut: std::future::Future> + Send + 'static, 26 | { 27 | type Input = Vec; 28 | type Output = (); 29 | type RunType = AsyncRunType; 30 | 31 | async fn process( 32 | &mut self, 33 | transactions: TransactionContext>, 34 | ) -> Result>, ProcessorError> { 35 | (self.process_function)(transactions.data, self.conn_pool.clone()) 36 | .await 37 | .map_err(|e| ProcessorError::ProcessError { 38 | message: format!("Processing transactionsfailed: {e:?}"), 39 | })?; 40 | Ok(Some(TransactionContext { 41 | data: (), // Stub out data since it's not used in the next step 42 | metadata: transactions.metadata, 43 | })) 44 | } 45 | } 46 | 47 | impl AsyncStep for BasicProcessorStep 48 | where 49 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 50 | Fut: std::future::Future> + Send + 'static, 51 | { 52 | } 53 | 54 | impl NamedStep for BasicProcessorStep 55 | where 56 | F: FnMut(Vec, ArcDbPool) -> Fut + Send + 'static, 57 | Fut: std::future::Future> + Send + 'static, 58 | { 59 | fn name(&self) -> String { 60 | "BasicProcessorStep".to_string() 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod basic_processor_function; 2 | pub mod basic_processor_step; 3 | 4 | pub use basic_processor_function::process; 5 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/diesel.toml: -------------------------------------------------------------------------------- 1 | # For documentation on how to configure this file, 2 | # see https://diesel.rs/guides/configuring-diesel-cli 3 | 4 | [print_schema] 5 | file = "processor_metadata_schema.rs" 6 | schema = "processor_metadata" 7 | 8 | [migrations_directory] 9 | dir = "migrations" 10 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/down.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); 6 | DROP FUNCTION IF EXISTS diesel_set_updated_at(); 7 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/up.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | 6 | 7 | 8 | -- Sets up a trigger for the given table to automatically set a column called 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included 10 | -- in the modified columns) 11 | -- 12 | -- # Example 13 | -- 14 | -- ```sql 15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); 16 | -- 17 | -- SELECT diesel_manage_updated_at('users'); 18 | -- ``` 19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ 20 | BEGIN 21 | EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s 22 | FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); 23 | END; 24 | $$ LANGUAGE plpgsql; 25 | 26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ 27 | BEGIN 28 | IF ( 29 | NEW IS DISTINCT FROM OLD AND 30 | NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at 31 | ) THEN 32 | NEW.updated_at := current_timestamp; 33 | END IF; 34 | RETURN NEW; 35 | END; 36 | $$ LANGUAGE plpgsql; 37 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/down.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE processor_metadata.processor_status IF EXISTS; 2 | DROP TABLE processor_metadata.ledger_infos IF EXISTS; 3 | DROP SCHEMA processor_metadata IF EXISTS; -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/up.sql: -------------------------------------------------------------------------------- 1 | CREATE SCHEMA IF NOT EXISTS processor_metadata; 2 | 3 | -- Tracks latest processed version per processor 4 | CREATE TABLE IF NOT EXISTS processor_metadata.processor_status ( 5 | processor VARCHAR(100) UNIQUE PRIMARY KEY NOT NULL, 6 | last_success_version BIGINT NOT NULL, 7 | last_updated TIMESTAMP NOT NULL DEFAULT NOW(), 8 | last_transaction_timestamp TIMESTAMP NULL 9 | ); 10 | 11 | -- Tracks chain id 12 | CREATE TABLE IF NOT EXISTS processor_metadata.ledger_infos (chain_id BIGINT UNIQUE PRIMARY KEY NOT NULL); 13 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/db/processor_metadata_schema.rs: -------------------------------------------------------------------------------- 1 | // @generated automatically by Diesel CLI. 2 | 3 | pub mod processor_metadata { 4 | diesel::table! { 5 | processor_metadata.ledger_infos (chain_id) { 6 | chain_id -> Int8, 7 | } 8 | } 9 | 10 | diesel::table! { 11 | processor_metadata.processor_status (processor) { 12 | #[max_length = 100] 13 | processor -> Varchar, 14 | last_success_version -> Int8, 15 | last_updated -> Timestamp, 16 | last_transaction_timestamp -> Nullable, 17 | } 18 | } 19 | 20 | diesel::allow_tables_to_appear_in_same_query!(ledger_infos, processor_status,); 21 | } 22 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/mod.rs: -------------------------------------------------------------------------------- 1 | use diesel_migrations::{embed_migrations, EmbeddedMigrations}; 2 | 3 | pub mod basic_processor; 4 | pub mod models; 5 | pub mod subconfigs; 6 | pub mod utils; 7 | 8 | #[path = "db/processor_metadata_schema.rs"] 9 | pub mod processor_metadata_schema; 10 | 11 | pub const SDK_MIGRATIONS: EmbeddedMigrations = embed_migrations!("./src/postgres/db/migrations"); 12 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/ledger_info.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::postgres::{ 7 | processor_metadata_schema::processor_metadata::ledger_infos, utils::database::DbPoolConnection, 8 | }; 9 | use diesel::{Identifiable, Insertable, OptionalExtension, QueryDsl, Queryable}; 10 | use diesel_async::RunQueryDsl; 11 | 12 | #[derive(Debug, Identifiable, Insertable, Queryable)] 13 | #[diesel(table_name = ledger_infos)] 14 | #[diesel(primary_key(chain_id))] 15 | pub struct LedgerInfo { 16 | pub chain_id: i64, 17 | } 18 | 19 | impl LedgerInfo { 20 | pub async fn get(conn: &mut DbPoolConnection<'_>) -> diesel::QueryResult> { 21 | ledger_infos::table 22 | .select(ledger_infos::all_columns) 23 | .first::(conn) 24 | .await 25 | .optional() 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod ledger_info; 2 | pub mod processor_status; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/models/processor_status.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::postgres::{ 7 | processor_metadata_schema::processor_metadata::processor_status, 8 | utils::database::DbPoolConnection, 9 | }; 10 | use diesel::{AsChangeset, ExpressionMethods, Insertable, OptionalExtension, QueryDsl, Queryable}; 11 | use diesel_async::RunQueryDsl; 12 | 13 | #[derive(AsChangeset, Debug, Insertable)] 14 | #[diesel(table_name = processor_status)] 15 | /// Only tracking the latest version successfully processed 16 | pub struct ProcessorStatus { 17 | pub processor: String, 18 | pub last_success_version: i64, 19 | pub last_transaction_timestamp: Option, 20 | } 21 | 22 | #[derive(AsChangeset, Debug, Queryable)] 23 | #[diesel(table_name = processor_status)] 24 | /// Only tracking the latest version successfully processed 25 | pub struct ProcessorStatusQuery { 26 | pub processor: String, 27 | pub last_success_version: i64, 28 | pub last_updated: chrono::NaiveDateTime, 29 | pub last_transaction_timestamp: Option, 30 | } 31 | 32 | impl ProcessorStatusQuery { 33 | pub async fn get_by_processor( 34 | processor_name: &str, 35 | conn: &mut DbPoolConnection<'_>, 36 | ) -> diesel::QueryResult> { 37 | processor_status::table 38 | .filter(processor_status::processor.eq(processor_name)) 39 | .first::(conn) 40 | .await 41 | .optional() 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod postgres_config; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/postgres_config.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | 3 | #[derive(Clone, Debug, Deserialize, Serialize)] 4 | #[serde(deny_unknown_fields)] 5 | pub struct PostgresConfig { 6 | pub connection_string: String, 7 | // Size of the pool for writes/reads to the DB. Limits maximum number of queries in flight 8 | #[serde(default = "PostgresConfig::default_db_pool_size")] 9 | pub db_pool_size: u32, 10 | } 11 | 12 | impl PostgresConfig { 13 | pub const fn default_db_pool_size() -> u32 { 14 | 150 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/checkpoint.rs: -------------------------------------------------------------------------------- 1 | use super::database::{execute_with_better_error, execute_with_better_error_conn, ArcDbPool}; 2 | use crate::{ 3 | aptos_indexer_transaction_stream::{utils::time::parse_timestamp, TransactionStreamConfig}, 4 | common_steps::ProcessorStatusSaver, 5 | postgres::{ 6 | models::{ 7 | ledger_info::LedgerInfo, 8 | processor_status::{ProcessorStatus, ProcessorStatusQuery}, 9 | }, 10 | processor_metadata_schema::processor_metadata::{ledger_infos, processor_status}, 11 | }, 12 | types::transaction_context::TransactionContext, 13 | utils::{chain_id_check::ChainIdChecker, errors::ProcessorError}, 14 | }; 15 | use anyhow::{Context, Result}; 16 | use async_trait::async_trait; 17 | use diesel::{query_dsl::methods::FilterDsl, upsert::excluded, ExpressionMethods}; 18 | 19 | /// A trait implementation of ChainIdChecker for Postgres. 20 | pub struct PostgresChainIdChecker { 21 | pub db_pool: ArcDbPool, 22 | } 23 | 24 | impl PostgresChainIdChecker { 25 | pub fn new(db_pool: ArcDbPool) -> Self { 26 | Self { db_pool } 27 | } 28 | } 29 | 30 | #[async_trait] 31 | impl ChainIdChecker for PostgresChainIdChecker { 32 | async fn save_chain_id(&self, chain_id: u64) -> Result<()> { 33 | let mut conn = self 34 | .db_pool 35 | .get() 36 | .await 37 | .context("Error getting db connection")?; 38 | execute_with_better_error_conn( 39 | &mut conn, 40 | diesel::insert_into(ledger_infos::table) 41 | .values(LedgerInfo { 42 | chain_id: chain_id as i64, 43 | }) 44 | .on_conflict_do_nothing(), 45 | ) 46 | .await 47 | .context("Error updating chain_id!")?; 48 | Ok(()) 49 | } 50 | 51 | async fn get_chain_id(&self) -> Result> { 52 | let mut conn = self.db_pool.get().await?; 53 | let maybe_existing_chain_id = LedgerInfo::get(&mut conn) 54 | .await? 55 | .map(|li| li.chain_id as u64); 56 | Ok(maybe_existing_chain_id) 57 | } 58 | } 59 | 60 | /// A trait implementation of ProcessorStatusSaver for Postgres. 61 | pub struct PostgresProcessorStatusSaver { 62 | pub db_pool: ArcDbPool, 63 | pub processor_name: String, 64 | } 65 | 66 | impl PostgresProcessorStatusSaver { 67 | pub fn new(processor_name: &str, db_pool: ArcDbPool) -> Self { 68 | Self { 69 | db_pool, 70 | processor_name: processor_name.to_string(), 71 | } 72 | } 73 | } 74 | 75 | #[async_trait] 76 | impl ProcessorStatusSaver for PostgresProcessorStatusSaver { 77 | async fn save_processor_status( 78 | &self, 79 | last_success_batch: &TransactionContext<()>, 80 | ) -> Result<(), ProcessorError> { 81 | let last_success_version = last_success_batch.metadata.end_version as i64; 82 | let last_transaction_timestamp = last_success_batch 83 | .metadata 84 | .end_transaction_timestamp 85 | .as_ref() 86 | .map(|t| parse_timestamp(t, last_success_batch.metadata.end_version as i64)) 87 | .map(|t| t.naive_utc()); 88 | let status = ProcessorStatus { 89 | processor: self.processor_name.clone(), 90 | last_success_version, 91 | last_transaction_timestamp, 92 | }; 93 | 94 | // Save regular processor status to the database 95 | execute_with_better_error( 96 | self.db_pool.clone(), 97 | diesel::insert_into(processor_status::table) 98 | .values(&status) 99 | .on_conflict(processor_status::processor) 100 | .do_update() 101 | .set(( 102 | processor_status::last_success_version 103 | .eq(excluded(processor_status::last_success_version)), 104 | processor_status::last_updated.eq(excluded(processor_status::last_updated)), 105 | processor_status::last_transaction_timestamp 106 | .eq(excluded(processor_status::last_transaction_timestamp)), 107 | )) 108 | .filter( 109 | processor_status::last_success_version 110 | .le(excluded(processor_status::last_success_version)), 111 | ), 112 | ) 113 | .await?; 114 | Ok(()) 115 | } 116 | } 117 | 118 | pub async fn get_starting_version( 119 | processor_name: &str, 120 | transaction_stream_config: TransactionStreamConfig, 121 | conn_pool: ArcDbPool, 122 | ) -> Result { 123 | let mut conn = conn_pool.get().await?; 124 | let latest_processed_version = 125 | ProcessorStatusQuery::get_by_processor(processor_name, &mut conn) 126 | .await? 127 | .map(|ps| ps.last_success_version as u64); 128 | // If nothing checkpointed, return the `starting_version` from the config, or 0 if not set. 129 | Ok(latest_processed_version.unwrap_or(transaction_stream_config.starting_version.unwrap_or(0))) 130 | } 131 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/database.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Database-related functions 5 | #![allow(clippy::extra_unused_lifetimes)] 6 | 7 | use crate::utils::{convert::remove_null_bytes, errors::ProcessorError}; 8 | use ahash::AHashMap; 9 | use diesel::{query_builder::QueryFragment, ConnectionResult, QueryResult}; 10 | use diesel_async::{ 11 | pooled_connection::{ 12 | bb8::{Pool, PooledConnection}, 13 | AsyncDieselConnectionManager, ManagerConfig, PoolError, 14 | }, 15 | AsyncPgConnection, RunQueryDsl, 16 | }; 17 | use diesel_migrations::{EmbeddedMigrations, MigrationHarness}; 18 | use futures_util::{future::BoxFuture, FutureExt}; 19 | use std::sync::Arc; 20 | use tracing::{info, warn}; 21 | 22 | pub type Backend = diesel::pg::Pg; 23 | 24 | pub type MyDbConnection = AsyncPgConnection; 25 | pub type DbPool = Pool; 26 | pub type ArcDbPool = Arc; 27 | pub type DbPoolConnection<'a> = PooledConnection<'a, MyDbConnection>; 28 | 29 | pub const DEFAULT_MAX_POOL_SIZE: u32 = 150; 30 | 31 | // the max is actually u16::MAX but we see that when the size is too big we get an overflow error so reducing it a bit 32 | pub const MAX_DIESEL_PARAM_SIZE: usize = (u16::MAX / 2) as usize; 33 | 34 | /// This function will clean the data for postgres. Currently it has support for removing 35 | /// null bytes from strings but in the future we will add more functionality. 36 | pub fn clean_data_for_db serde::Deserialize<'de>>( 37 | items: Vec, 38 | should_remove_null_bytes: bool, 39 | ) -> Vec { 40 | if should_remove_null_bytes { 41 | items.iter().map(remove_null_bytes).collect() 42 | } else { 43 | items 44 | } 45 | } 46 | 47 | fn establish_connection(database_url: &str) -> BoxFuture> { 48 | use native_tls::{Certificate, TlsConnector}; 49 | use postgres_native_tls::MakeTlsConnector; 50 | 51 | (async move { 52 | let (url, cert_path) = parse_and_clean_db_url(database_url); 53 | let cert = std::fs::read(cert_path.unwrap()).expect("Could not read certificate"); 54 | 55 | let cert = Certificate::from_pem(&cert).expect("Could not parse certificate"); 56 | let connector = TlsConnector::builder() 57 | .danger_accept_invalid_certs(true) 58 | .add_root_certificate(cert) 59 | .build() 60 | .expect("Could not build TLS connector"); 61 | let connector = MakeTlsConnector::new(connector); 62 | 63 | let (client, connection) = tokio_postgres::connect(&url, connector) 64 | .await 65 | .expect("Could not connect to database"); 66 | tokio::spawn(async move { 67 | if let Err(e) = connection.await { 68 | eprintln!("connection error: {e}"); 69 | } 70 | }); 71 | AsyncPgConnection::try_from(client).await 72 | }) 73 | .boxed() 74 | } 75 | 76 | fn parse_and_clean_db_url(url: &str) -> (String, Option) { 77 | let mut db_url = url::Url::parse(url).expect("Could not parse database url"); 78 | let mut cert_path = None; 79 | 80 | let mut query = "".to_string(); 81 | db_url.query_pairs().for_each(|(k, v)| { 82 | if k == "sslrootcert" { 83 | cert_path = Some(v.parse().unwrap()); 84 | } else { 85 | query.push_str(&format!("{k}={v}&")); 86 | } 87 | }); 88 | db_url.set_query(Some(&query)); 89 | 90 | (db_url.to_string(), cert_path) 91 | } 92 | 93 | pub async fn new_db_pool( 94 | database_url: &str, 95 | max_pool_size: Option, 96 | ) -> Result { 97 | let (_url, cert_path) = parse_and_clean_db_url(database_url); 98 | 99 | let config = if cert_path.is_some() { 100 | let mut config = ManagerConfig::::default(); 101 | config.custom_setup = Box::new(|conn| Box::pin(establish_connection(conn))); 102 | AsyncDieselConnectionManager::::new_with_config(database_url, config) 103 | } else { 104 | AsyncDieselConnectionManager::::new(database_url) 105 | }; 106 | let pool = Pool::builder() 107 | .max_size(max_pool_size.unwrap_or(DEFAULT_MAX_POOL_SIZE)) 108 | .build(config) 109 | .await?; 110 | Ok(Arc::new(pool)) 111 | } 112 | 113 | pub async fn execute_in_chunks( 114 | conn: ArcDbPool, 115 | build_query: fn(Vec) -> U, 116 | items_to_insert: &[T], 117 | chunk_size: usize, 118 | ) -> Result<(), ProcessorError> 119 | where 120 | U: QueryFragment + diesel::query_builder::QueryId + Send + 'static, 121 | T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone + Send + 'static, 122 | { 123 | let tasks = items_to_insert 124 | .chunks(chunk_size) 125 | .map(|chunk| { 126 | let conn = conn.clone(); 127 | let items = chunk.to_vec(); 128 | tokio::spawn(async move { 129 | let query = build_query(items.clone()); 130 | execute_or_retry_cleaned(conn, build_query, items, query).await 131 | }) 132 | }) 133 | .collect::>(); 134 | 135 | let results = futures_util::future::try_join_all(tasks) 136 | .await 137 | .expect("Task panicked executing in chunks"); 138 | for res in results { 139 | res? 140 | } 141 | 142 | Ok(()) 143 | } 144 | 145 | /// Returns the entry for the config hashmap, or the default field count for the insert. 146 | /// 147 | /// Given diesel has a limit of how many parameters can be inserted in a single operation (u16::MAX), 148 | /// we default to chunk an array of items based on how many columns are in the table. 149 | pub fn get_config_table_chunk_size( 150 | table_name: &str, 151 | per_table_chunk_sizes: &AHashMap, 152 | ) -> usize { 153 | let chunk_size = per_table_chunk_sizes.get(table_name).copied(); 154 | chunk_size.unwrap_or_else(|| MAX_DIESEL_PARAM_SIZE / T::field_count()) 155 | } 156 | 157 | pub async fn execute_with_better_error( 158 | pool: ArcDbPool, 159 | query: U, 160 | ) -> Result 161 | where 162 | U: QueryFragment + diesel::query_builder::QueryId + Send, 163 | { 164 | let debug_string = diesel::debug_query::(&query).to_string(); 165 | let conn = &mut pool.get().await.map_err(|e| { 166 | warn!("Error getting connection from pool: {:?}", e); 167 | ProcessorError::DBStoreError { 168 | message: format!("{e:#}"), 169 | query: Some(debug_string.clone()), 170 | } 171 | })?; 172 | query 173 | .execute(conn) 174 | .await 175 | .inspect_err(|e| { 176 | warn!("Error running query: {:?}\n{:?}", e, debug_string); 177 | }) 178 | .map_err(|e| ProcessorError::DBStoreError { 179 | message: format!("{e:#}"), 180 | query: Some(debug_string), 181 | }) 182 | } 183 | 184 | pub async fn execute_with_better_error_conn( 185 | conn: &mut MyDbConnection, 186 | query: U, 187 | ) -> QueryResult 188 | where 189 | U: QueryFragment + diesel::query_builder::QueryId + Send, 190 | { 191 | let debug_string = diesel::debug_query::(&query).to_string(); 192 | tracing::debug!("Executing query: {:?}", debug_string); 193 | let res = query.execute(conn).await; 194 | if let Err(ref e) = res { 195 | tracing::warn!("Error running query: {:?}\n{:?}", e, debug_string); 196 | } 197 | res 198 | } 199 | 200 | async fn execute_or_retry_cleaned( 201 | conn: ArcDbPool, 202 | build_query: fn(Vec) -> U, 203 | items: Vec, 204 | query: U, 205 | ) -> Result<(), ProcessorError> 206 | where 207 | U: QueryFragment + diesel::query_builder::QueryId + Send, 208 | T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone, 209 | { 210 | match execute_with_better_error(conn.clone(), query).await { 211 | Ok(_) => {}, 212 | Err(_) => { 213 | let cleaned_items = clean_data_for_db(items, true); 214 | let cleaned_query = build_query(cleaned_items); 215 | match execute_with_better_error(conn.clone(), cleaned_query).await { 216 | Ok(_) => {}, 217 | Err(e) => { 218 | return Err(e); 219 | }, 220 | } 221 | }, 222 | } 223 | Ok(()) 224 | } 225 | 226 | pub fn run_pending_migrations( 227 | conn: &mut impl MigrationHarness, 228 | migrations: EmbeddedMigrations, 229 | ) { 230 | conn.run_pending_migrations(migrations) 231 | .expect("[Parser] Migrations failed!"); 232 | } 233 | 234 | // For the normal processor build we just use standard Diesel with the postgres 235 | // feature enabled (which uses libpq under the hood, hence why we named the feature 236 | // this way). 237 | #[cfg(feature = "postgres_full")] 238 | pub async fn run_migrations( 239 | postgres_connection_string: String, 240 | _conn_pool: ArcDbPool, 241 | migrations: EmbeddedMigrations, 242 | ) { 243 | use diesel::{Connection, PgConnection}; 244 | 245 | info!("Running migrations: {:?}", postgres_connection_string); 246 | let migration_time = std::time::Instant::now(); 247 | let mut conn = 248 | PgConnection::establish(&postgres_connection_string).expect("migrations failed!"); 249 | run_pending_migrations(&mut conn, migrations); 250 | info!( 251 | duration_in_secs = migration_time.elapsed().as_secs_f64(), 252 | "[Parser] Finished migrations" 253 | ); 254 | } 255 | 256 | // If the postgres_full feature isn't enabled, we use diesel async instead. This is used by 257 | // the CLI for the local testnet, where we cannot tolerate the libpq dependency. 258 | #[cfg(not(feature = "postgres_full"))] 259 | pub async fn run_migrations( 260 | postgres_connection_string: String, 261 | conn_pool: ArcDbPool, 262 | migrations: EmbeddedMigrations, 263 | ) { 264 | use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; 265 | 266 | info!("Running migrations: {:?}", postgres_connection_string); 267 | let conn = conn_pool 268 | // We need to use this since AsyncConnectionWrapper doesn't know how to 269 | // work with a pooled connection. 270 | .dedicated_connection() 271 | .await 272 | .expect("[Parser] Failed to get connection"); 273 | // We use spawn_blocking since run_pending_migrations is a blocking function. 274 | tokio::task::spawn_blocking(move || { 275 | // This lets us use the connection like a normal diesel connection. See more: 276 | // https://docs.rs/diesel-async/latest/diesel_async/async_connection_wrapper/type.AsyncConnectionWrapper.html 277 | let mut conn: AsyncConnectionWrapper = 278 | AsyncConnectionWrapper::from(conn); 279 | run_pending_migrations(&mut conn, migrations); 280 | }) 281 | .await 282 | .expect("[Parser] Failed to run migrations"); 283 | } 284 | 285 | pub struct DbContext<'a> { 286 | pub conn: DbPoolConnection<'a>, 287 | pub query_retries: u32, 288 | pub query_retry_delay_ms: u64, 289 | } 290 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/postgres/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod checkpoint; 2 | pub mod database; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/server_framework.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | 3 | use crate::{ 4 | instrumented_channel::channel_metrics::init_channel_metrics_registry, 5 | utils::step_metrics::init_step_metrics_registry, 6 | }; 7 | use anyhow::{Context, Result}; 8 | #[cfg(target_os = "linux")] 9 | use aptos_system_utils::profiling::start_cpu_profiling; 10 | use autometrics::settings::AutometricsSettings; 11 | use axum::{http::StatusCode, response::IntoResponse, routing::get, Router}; 12 | use backtrace::Backtrace; 13 | use clap::Parser; 14 | use prometheus_client::registry::Registry; 15 | use serde::{de::DeserializeOwned, Deserialize, Serialize}; 16 | // TODO: remove deprecated lint when new clippy nightly is released 17 | #[allow(deprecated)] 18 | use std::{fs::File, io::Read, panic::PanicInfo, path::PathBuf, process}; 19 | use tokio::runtime::Handle; 20 | use tracing::error; 21 | use tracing_subscriber::EnvFilter; 22 | 23 | /// ServerArgs bootstraps a server with all common pieces. And then triggers the run method for 24 | /// the specific service. 25 | #[derive(Parser)] 26 | pub struct ServerArgs { 27 | #[clap(short, long, value_parser)] 28 | pub config_path: PathBuf, 29 | } 30 | 31 | impl ServerArgs { 32 | pub async fn run(&self, handle: Handle) -> Result<()> 33 | where 34 | C: RunnableConfig, 35 | { 36 | // Set up the server. 37 | setup_logging(); 38 | setup_panic_handler(); 39 | let config = load::>(&self.config_path)?; 40 | run_server_with_config(config, handle).await 41 | } 42 | } 43 | 44 | /// Run a server and the necessary probes. For spawning these tasks, the user must 45 | /// provide a handle to a runtime they already have. 46 | pub async fn run_server_with_config(config: GenericConfig, handle: Handle) -> Result<()> 47 | where 48 | C: RunnableConfig, 49 | { 50 | let health_port = config.health_check_port; 51 | let additional_labels = config.metrics_config.additional_labels.clone(); 52 | // Start liveness and readiness probes. 53 | let task_handler = handle.spawn(async move { 54 | register_probes_and_metrics_handler(health_port, additional_labels).await; 55 | anyhow::Ok(()) 56 | }); 57 | let main_task_handler = handle.spawn(async move { config.run().await }); 58 | tokio::select! { 59 | res = task_handler => { 60 | res.expect("Probes and metrics handler unexpectedly exited") 61 | }, 62 | res = main_task_handler => { 63 | res.expect("Main task handler unexpectedly exited") 64 | }, 65 | } 66 | } 67 | 68 | #[derive(Deserialize, Debug, Serialize)] 69 | pub struct GenericConfig { 70 | // Shared configuration among all services. 71 | pub health_check_port: u16, 72 | 73 | #[serde(default)] 74 | pub metrics_config: MetricsConfig, 75 | 76 | // Specific configuration for each service. 77 | pub server_config: T, 78 | } 79 | 80 | #[derive(Clone, Deserialize, Debug, Default, Serialize)] 81 | pub struct MetricsConfig { 82 | /// Additional labels to use for metrics. 83 | pub additional_labels: Vec<(String, String)>, 84 | } 85 | 86 | #[async_trait::async_trait] 87 | impl RunnableConfig for GenericConfig 88 | where 89 | T: RunnableConfig, 90 | { 91 | async fn run(&self) -> Result<()> { 92 | self.server_config.run().await 93 | } 94 | 95 | fn get_server_name(&self) -> String { 96 | self.server_config.get_server_name() 97 | } 98 | } 99 | 100 | /// RunnableConfig is a trait that all services must implement for their configuration. 101 | #[async_trait::async_trait] 102 | pub trait RunnableConfig: DeserializeOwned + Send + Sync + 'static { 103 | async fn run(&self) -> Result<()>; 104 | fn get_server_name(&self) -> String; 105 | } 106 | 107 | /// Parse a yaml file into a struct. 108 | pub fn load Deserialize<'de>>(path: &PathBuf) -> Result { 109 | let mut file = 110 | File::open(path).with_context(|| format!("failed to open the file at path: {path:?}",))?; 111 | let mut contents = String::new(); 112 | file.read_to_string(&mut contents) 113 | .with_context(|| format!("failed to read the file at path: {path:?}",))?; 114 | serde_yaml::from_str::(&contents).context("Unable to parse yaml file") 115 | } 116 | 117 | #[derive(Debug, Serialize)] 118 | pub struct CrashInfo { 119 | details: String, 120 | backtrace: String, 121 | } 122 | 123 | /// Invoke to ensure process exits on a thread panic. 124 | /// 125 | /// Tokio's default behavior is to catch panics and ignore them. Invoking this function will 126 | /// ensure that all subsequent thread panics (even Tokio threads) will report the 127 | /// details/backtrace and then exit. 128 | pub fn setup_panic_handler() { 129 | // TODO: remove deprecated lint when new clippy nightly is released 130 | #[allow(deprecated)] 131 | std::panic::set_hook(Box::new(move |pi: &PanicInfo<'_>| { 132 | handle_panic(pi); 133 | })); 134 | } 135 | 136 | // Formats and logs panic information 137 | // TODO: remove deprecated lint when new clippy nightly is released 138 | #[allow(deprecated)] 139 | fn handle_panic(panic_info: &PanicInfo<'_>) { 140 | // The Display formatter for a PanicInfo contains the message, payload and location. 141 | let details = format!("{panic_info}",); 142 | let backtrace = format!("{:#?}", Backtrace::new()); 143 | let info = CrashInfo { details, backtrace }; 144 | let crash_info = toml::to_string_pretty(&info).unwrap(); 145 | error!("{}", crash_info); 146 | // TODO / HACK ALARM: Write crash info synchronously via eprintln! to ensure it is written before the process exits which error! doesn't guarantee. 147 | // This is a workaround until https://github.com/aptos-labs/aptos-core/issues/2038 is resolved. 148 | eprintln!("{crash_info}",); 149 | // Kill the process 150 | process::exit(12); 151 | } 152 | 153 | /// Set up logging for the server. 154 | pub fn setup_logging() { 155 | let env_filter = EnvFilter::try_from_default_env() 156 | .or_else(|_| EnvFilter::try_new("info")) 157 | .unwrap(); 158 | tracing_subscriber::fmt() 159 | .json() 160 | .with_file(true) 161 | .with_line_number(true) 162 | .with_thread_ids(true) 163 | .with_target(false) 164 | .with_thread_names(true) 165 | .with_env_filter(env_filter) 166 | .flatten_event(true) 167 | .init(); 168 | } 169 | 170 | /// Register readiness and liveness probes and set up metrics endpoint. 171 | pub async fn register_probes_and_metrics_handler( 172 | port: u16, 173 | additional_labels: Vec<(String, String)>, 174 | ) { 175 | let mut registry = Registry::with_labels( 176 | additional_labels 177 | .into_iter() 178 | .map(|(k, v)| (k.into(), v.into())), 179 | ); 180 | init_step_metrics_registry(&mut registry); 181 | init_channel_metrics_registry(&mut registry); 182 | AutometricsSettings::builder() 183 | .prometheus_client_registry(registry) 184 | .init(); 185 | 186 | let router = Router::new() 187 | .route("/readiness", get(StatusCode::OK)) 188 | .route("/metrics", get(metrics_handler)); 189 | 190 | #[cfg(target_os = "linux")] 191 | let router = router.merge(Router::new().route("/profilez", get(profilez_handler))); 192 | 193 | let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{port}",)) 194 | .await 195 | .expect("Failed to bind TCP listener"); 196 | axum::serve(listener, router).await.unwrap(); 197 | } 198 | 199 | async fn metrics_handler() -> impl IntoResponse { 200 | match autometrics::prometheus_exporter::encode_to_string() { 201 | Ok(prometheus_client_rust_metrics) => ( 202 | StatusCode::OK, 203 | [("Content-Type", "text/plain; version=0.0.4")], 204 | prometheus_client_rust_metrics, 205 | ) 206 | .into_response(), 207 | Err(err) => (StatusCode::INTERNAL_SERVER_ERROR, format!("{err:?}",)).into_response(), 208 | } 209 | } 210 | 211 | #[cfg(target_os = "linux")] 212 | async fn profilez_handler() -> impl IntoResponse { 213 | match start_cpu_profiling(10, 99, false).await { 214 | Ok(body) => ( 215 | StatusCode::OK, 216 | [ 217 | ("Content-Length", body.len().to_string()), 218 | ("Content-Disposition", "inline".to_string()), 219 | ("Content-Type", "image/svg+xml".to_string()), 220 | ], 221 | body, 222 | ) 223 | .into_response(), 224 | Err(e) => ( 225 | StatusCode::INTERNAL_SERVER_ERROR, 226 | format!("Profiling failed: {e:?}."), 227 | ) 228 | .into_response(), 229 | } 230 | } 231 | 232 | #[cfg(test)] 233 | mod tests { 234 | use super::*; 235 | use std::io::Write; 236 | use tempfile::tempdir; 237 | 238 | #[derive(Clone, Debug, Deserialize, Serialize)] 239 | #[serde(deny_unknown_fields)] 240 | pub struct TestConfig { 241 | test: u32, 242 | test_name: String, 243 | } 244 | 245 | #[async_trait::async_trait] 246 | impl RunnableConfig for TestConfig { 247 | async fn run(&self) -> Result<()> { 248 | assert_eq!(self.test, 123); 249 | assert_eq!(self.test_name, "test"); 250 | Ok(()) 251 | } 252 | 253 | fn get_server_name(&self) -> String { 254 | self.test_name.clone() 255 | } 256 | } 257 | 258 | #[test] 259 | fn test_random_config_creation() { 260 | let dir = tempdir().expect("tempdir failure"); 261 | 262 | let file_path = dir.path().join("testing_yaml.yaml"); 263 | let mut file = File::create(&file_path).expect("create failure"); 264 | let raw_yaml_content = r#" 265 | health_check_port: 12345 266 | server_config: 267 | test: 123 268 | test_name: "test" 269 | "#; 270 | writeln!(file, "{raw_yaml_content}").expect("write_all failure"); 271 | 272 | let config = load::>(&file_path).unwrap(); 273 | assert_eq!(config.health_check_port, 12345); 274 | assert_eq!(config.server_config.test, 123); 275 | assert_eq!(config.server_config.test_name, "test"); 276 | } 277 | 278 | #[test] 279 | fn verify_tool() { 280 | use clap::CommandFactory; 281 | ServerArgs::command().debug_assert() 282 | } 283 | } 284 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod steps; 2 | pub mod utils; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/steps/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod pass_through_step; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/steps/pass_through_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable}, 3 | types::transaction_context::TransactionContext, 4 | utils::errors::ProcessorError, 5 | }; 6 | use anyhow::Result; 7 | use async_trait::async_trait; 8 | use std::marker::PhantomData; 9 | 10 | pub struct PassThroughStep { 11 | name: Option, 12 | _input: PhantomData, 13 | } 14 | 15 | impl Default for PassThroughStep { 16 | fn default() -> Self { 17 | Self { 18 | name: None, 19 | _input: PhantomData, 20 | } 21 | } 22 | } 23 | 24 | impl PassThroughStep { 25 | pub fn new_named(name: String) -> Self { 26 | Self { 27 | name: Some(name), 28 | _input: PhantomData, 29 | } 30 | } 31 | } 32 | 33 | impl AsyncStep for PassThroughStep {} 34 | 35 | impl NamedStep for PassThroughStep { 36 | fn name(&self) -> String { 37 | self.name 38 | .clone() 39 | .unwrap_or_else(|| "PassThroughStep".to_string()) 40 | } 41 | } 42 | 43 | #[async_trait] 44 | impl Processable for PassThroughStep { 45 | type Input = Input; 46 | type Output = Input; 47 | type RunType = AsyncRunType; 48 | 49 | async fn process( 50 | &mut self, 51 | item: TransactionContext, 52 | ) -> Result>, ProcessorError> { 53 | Ok(Some(item)) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/test/utils.rs: -------------------------------------------------------------------------------- 1 | use instrumented_channel::InstrumentedAsyncReceiver; 2 | use std::time::Duration; 3 | 4 | pub async fn receive_with_timeout( 5 | receiver: &mut InstrumentedAsyncReceiver, 6 | timeout_ms: u64, 7 | ) -> Option { 8 | tokio::time::timeout(Duration::from_millis(timeout_ms), async { 9 | receiver.recv().await 10 | }) 11 | .await 12 | .unwrap() 13 | .ok() 14 | } 15 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/cli_parser.rs: -------------------------------------------------------------------------------- 1 | use once_cell::sync::Lazy; 2 | use std::sync::Mutex; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct TestArgs { 6 | pub generate_output: bool, 7 | pub output_path: Option, 8 | } 9 | 10 | // Define a global static to store the parsed arguments 11 | static TEST_CONFIG: Lazy> = Lazy::new(|| { 12 | let args = parse_test_args(); 13 | Mutex::new(args) 14 | }); 15 | 16 | // Function to fetch global test args 17 | pub fn get_test_config() -> (bool, Option) { 18 | let test_args = TEST_CONFIG.lock().unwrap().clone(); 19 | (test_args.generate_output, test_args.output_path) 20 | } 21 | 22 | pub fn parse_test_args() -> TestArgs { 23 | let raw_args: Vec = std::env::args().collect(); 24 | 25 | // Find the "--" separator, or default to include all args after the test name 26 | let clap_args_position = raw_args.iter().position(|arg| arg == "--"); 27 | 28 | // Determine the starting position for custom arguments 29 | let custom_args_start = match clap_args_position { 30 | Some(position) => position + 1, // Start after the "--" if it exists 31 | None => 1, // Start after the test name, skip the first element 32 | }; 33 | 34 | // Collect custom arguments based on determined start position 35 | let custom_args: Vec = raw_args[custom_args_start..].to_vec(); 36 | 37 | // Manually parse the "generate" flag 38 | let generate_flag = custom_args.contains(&"generate".to_string()); 39 | 40 | // Manually parse the "--output-path" flag and get its associated value 41 | let output_path = custom_args 42 | .windows(2) 43 | .find(|args| args[0] == "output-path") 44 | .map(|args| args[1].clone()); 45 | 46 | println!("Parsed generate flag: {generate_flag}",); 47 | println!( 48 | "Parsed output_path: {}", 49 | output_path.clone().unwrap_or_else(|| "None".to_string()) 50 | ); 51 | 52 | TestArgs { 53 | generate_output: generate_flag, 54 | output_path, 55 | } 56 | } 57 | 58 | #[cfg(test)] 59 | mod tests { 60 | use super::*; 61 | pub fn parse_test_args_from_vec(args: Vec) -> TestArgs { 62 | // Find the "--" separator (if it exists) 63 | let clap_args_position = args.iter().position(|arg| arg == "--"); 64 | 65 | // Only pass the arguments that come after "--", if it exists 66 | let custom_args: Vec = match clap_args_position { 67 | Some(position) => args[position + 1..].to_vec(), // Slice after `--` 68 | None => Vec::new(), // If no `--` is found, treat as no custom args 69 | }; 70 | 71 | // Manually parse the "--generate" flag 72 | let generate_output_flag = custom_args.contains(&"generate".to_string()); 73 | 74 | // Manually parse the "--output-path" flag and get its associated value 75 | let output_path = custom_args 76 | .windows(2) 77 | .find(|args| args[0] == "output-path") 78 | .map(|args| args[1].clone()); 79 | 80 | println!("Parsed generate_output_flag: {generate_output_flag}"); 81 | println!( 82 | "Parsed output_path: {}", 83 | output_path.clone().unwrap_or_else(|| "None".to_string()) 84 | ); 85 | 86 | TestArgs { 87 | generate_output: generate_output_flag, 88 | output_path, 89 | } 90 | } 91 | 92 | #[test] 93 | fn test_parse_generate_output_flag() { 94 | let args = vec![ 95 | "test_binary".to_string(), 96 | "--".to_string(), 97 | "generate".to_string(), 98 | ]; 99 | let parsed = parse_test_args_from_vec(args); 100 | assert!(parsed.generate_output); 101 | assert_eq!(parsed.output_path, None); 102 | } 103 | 104 | #[test] 105 | fn test_parse_output_path() { 106 | let args = vec![ 107 | "test_binary".to_string(), 108 | "--".to_string(), 109 | "output-path".to_string(), 110 | "/some/path".to_string(), 111 | ]; 112 | let parsed = parse_test_args_from_vec(args); 113 | assert!(!parsed.generate_output); 114 | assert_eq!(parsed.output_path, Some("/some/path".to_string())); 115 | } 116 | 117 | #[test] 118 | fn test_parse_both_arguments() { 119 | let args = vec![ 120 | "test_binary".to_string(), 121 | "--".to_string(), 122 | "generate".to_string(), 123 | "output-path".to_string(), 124 | "/some/other/path".to_string(), 125 | ]; 126 | let parsed = parse_test_args_from_vec(args); 127 | assert!(parsed.generate_output); 128 | assert_eq!(parsed.output_path, Some("/some/other/path".to_string())); 129 | } 130 | 131 | #[test] 132 | fn test_parse_no_arguments() { 133 | let args = vec!["test_binary".to_string()]; 134 | let parsed = parse_test_args_from_vec(args); 135 | assert!(!parsed.generate_output); 136 | assert_eq!(parsed.output_path, None); 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/database.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use async_trait::async_trait; 3 | use testcontainers::{ 4 | core::{IntoContainerPort, WaitFor}, 5 | runners::AsyncRunner, 6 | ContainerAsync, GenericImage, ImageExt, 7 | }; 8 | 9 | const POSTGRES_IMAGE: &str = "postgres"; 10 | const POSTGRES_VERSION: &str = "14"; 11 | const POSTGRES_PORT: u16 = 5432; 12 | const POSTGRES_DB: &str = "postgres"; 13 | const POSTGRES_USER: &str = "postgres"; 14 | const POSTGRES_PASSWORD: &str = "postgres"; 15 | 16 | #[async_trait] 17 | pub trait TestDatabase: Send + Sync { 18 | /// Set up the test container using user-defined code. 19 | async fn setup<'a>(&'a mut self) -> anyhow::Result<()>; 20 | 21 | /// Retrieve the database connection URL after setup. 22 | fn get_db_url(&self) -> String; 23 | } 24 | 25 | #[derive(Default)] 26 | pub struct PostgresTestDatabase { 27 | connection_string: String, 28 | postgres_container: Option>, 29 | } 30 | 31 | impl PostgresTestDatabase { 32 | pub fn new() -> Self { 33 | PostgresTestDatabase { 34 | postgres_container: None, 35 | connection_string: String::new(), 36 | } 37 | } 38 | 39 | /// Helper method to configure and start the Postgres container. 40 | async fn start_postgres_container(&mut self) -> Result> { 41 | let postgres_image = GenericImage::new(POSTGRES_IMAGE, POSTGRES_VERSION) 42 | .with_exposed_port(POSTGRES_PORT.tcp()) 43 | .with_wait_for(WaitFor::message_on_stderr( 44 | "database system is ready to accept connections", 45 | )) 46 | .with_env_var("POSTGRES_DB", POSTGRES_DB) 47 | .with_env_var("POSTGRES_USER", POSTGRES_USER) 48 | .with_env_var("POSTGRES_PASSWORD", POSTGRES_PASSWORD); 49 | 50 | let container = postgres_image 51 | .start() 52 | .await 53 | .context("Failed to start Postgres container")?; 54 | 55 | Ok(container) 56 | } 57 | 58 | /// Helper method to get the host and port information of the running container. 59 | async fn get_connection_info(&self) -> Result<(String, u16)> { 60 | let host = self 61 | .postgres_container 62 | .as_ref() 63 | .context("Postgres container not initialized")? 64 | .get_host() 65 | .await 66 | .context("Failed to get container host")?; 67 | 68 | let port = self 69 | .postgres_container 70 | .as_ref() 71 | .context("Postgres container not initialized")? 72 | .get_host_port_ipv4(5432) 73 | .await 74 | .context("Failed to get container port")?; 75 | 76 | Ok((host.to_string(), port)) 77 | } 78 | } 79 | 80 | #[async_trait] 81 | impl TestDatabase for PostgresTestDatabase { 82 | /// Set up the Postgres container and get the database connection URL. 83 | async fn setup(&mut self) -> Result<()> { 84 | self.postgres_container = Some(self.start_postgres_container().await?); 85 | 86 | let (host, port) = self.get_connection_info().await?; 87 | 88 | self.connection_string = format!("postgres://postgres:postgres@{host}:{port}/postgres"); 89 | Ok(()) 90 | } 91 | 92 | /// Retrieve the Postgres connection URL after the container has been set up. 93 | fn get_db_url(&self) -> String { 94 | self.connection_string.clone() 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/mock_grpc.rs: -------------------------------------------------------------------------------- 1 | use aptos_protos::indexer::v1::{ 2 | raw_data_server::{RawData, RawDataServer}, 3 | GetTransactionsRequest, ProcessedRange, TransactionsResponse, 4 | }; 5 | use futures::Stream; 6 | use std::{collections::HashMap, pin::Pin}; 7 | use tokio::time::{timeout, Duration}; 8 | use tokio_stream::wrappers::TcpListenerStream; 9 | use tonic::{transport::Server, Request, Response, Status}; 10 | 11 | // Bind to port 0 to get a random available port 12 | const GRPC_ADDRESS: &str = "127.0.0.1:0"; 13 | 14 | #[derive(Default)] 15 | pub struct MockGrpcServer { 16 | pub transactions_response: Vec, 17 | pub chain_id: u64, 18 | } 19 | 20 | type ResponseStream = Pin> + Send>>; 21 | 22 | #[tonic::async_trait] 23 | impl RawData for MockGrpcServer { 24 | type GetTransactionsStream = ResponseStream; 25 | 26 | async fn get_transactions( 27 | &self, 28 | req: Request, 29 | ) -> Result, Status> { 30 | let request = req.into_inner(); 31 | let starting_version = request.starting_version.unwrap_or(0); // Default to 0 if starting_version is not provided 32 | let transactions_count = request.transactions_count.unwrap_or(1); // Default to 1 if transactions_count is not provided 33 | let mut collected_transactions = Vec::new(); 34 | 35 | let mut transaction_map = HashMap::new(); 36 | for transaction_response in &self.transactions_response { 37 | for tx in &transaction_response.transactions { 38 | transaction_map.insert(tx.version, tx.clone()); 39 | } 40 | } 41 | 42 | let mut sorted_transactions: Vec<_> = transaction_map 43 | .iter() 44 | .filter(|(&version, _)| version >= starting_version) 45 | .map(|(_, tx)| tx.clone()) 46 | .collect(); 47 | sorted_transactions.sort_by_key(|tx| tx.version); 48 | 49 | collected_transactions.extend( 50 | sorted_transactions 51 | .into_iter() 52 | .take(transactions_count as usize), 53 | ); 54 | 55 | let result = if !collected_transactions.is_empty() { 56 | TransactionsResponse { 57 | transactions: collected_transactions, 58 | chain_id: Some(self.chain_id), 59 | processed_range: Some(ProcessedRange { 60 | first_version: starting_version, 61 | last_version: starting_version + transactions_count - 1, 62 | }), 63 | } 64 | } else { 65 | // Return a default response with chain_id if no transactions are found 66 | let mut default_transaction_response = self.transactions_response[0].clone(); 67 | default_transaction_response.chain_id = Some(self.chain_id); 68 | default_transaction_response 69 | }; 70 | 71 | let stream = futures::stream::iter(vec![Ok(result)]); 72 | Ok(Response::new(Box::pin(stream))) 73 | } 74 | } 75 | 76 | impl MockGrpcServer { 77 | pub async fn run(self) -> anyhow::Result { 78 | let listener = tokio::net::TcpListener::bind(GRPC_ADDRESS).await?; 79 | let bound_addr = listener.local_addr()?; // Get the actual bound address 80 | 81 | // Convert the TcpListener into a TcpListenerStream (wrapping it with `?` to handle potential errors) 82 | let stream = TcpListenerStream::new(listener); 83 | 84 | // Build and start the gRPC server without graceful shutdown 85 | let server = Server::builder().add_service( 86 | RawDataServer::new(self) 87 | .accept_compressed(tonic::codec::CompressionEncoding::Zstd) // Enable compression for incoming requests 88 | .send_compressed(tonic::codec::CompressionEncoding::Zstd), // Compress outgoing responses 89 | ); 90 | 91 | tokio::spawn(async move { 92 | // This server will run until the process is killed or the task is stopped 93 | let server_timeout = Duration::from_secs(60); 94 | 95 | match timeout(server_timeout, server.serve_with_incoming(stream)).await { 96 | Ok(result) => match result { 97 | Ok(_) => { 98 | println!("Server stopped successfully."); 99 | }, 100 | Err(e) => { 101 | eprintln!("Failed to run gRPC server: {e:?}"); 102 | }, 103 | }, 104 | Err(_) => { 105 | eprintln!("Server timed out and was stopped."); 106 | }, 107 | } 108 | }); 109 | 110 | // Return the port number so it can be used by other parts of the program 111 | let port = bound_addr.port(); 112 | println!("Server is running on port {port}",); 113 | 114 | Ok(port) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/testing_framework/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cli_parser; 2 | pub mod database; 3 | mod mock_grpc; 4 | pub mod sdk_test_context; 5 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/README.md: -------------------------------------------------------------------------------- 1 | # Traits 2 | 3 | ## Async Step 4 | 5 | The `async_step.rs` file provides tools for handling asynchronous steps in processing. 6 | 7 | Implement `AsyncStep` for steps that process data directly without buffering. 8 | 9 | ## Pollable Async Step 10 | 11 | The `pollable_async_step.rs` file provides tools for handling steps that can be polled asynchronously. 12 | 13 | Implement `PollableAsyncStep` for steps that buffer or poll data over a duration of time in an asynchronous manner. 14 | 15 | ## Processable 16 | The `processable.rs` file defines the `Processable` trait, which each step implements. 17 | 18 | ## Processor trait 19 | The `processor_trait.rs` defines `ProcessorTrait`, which each processor implements. 20 | 21 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/async_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::{ 3 | processable::RunnableStepType, IntoRunnableStep, NamedStep, Processable, RunnableStep, 4 | }, 5 | types::transaction_context::TransactionContext, 6 | utils::step_metrics::{StepMetricLabels, StepMetricsBuilder}, 7 | }; 8 | use async_trait::async_trait; 9 | use bigdecimal::Zero; 10 | use instrumented_channel::{ 11 | instrumented_bounded_channel, InstrumentedAsyncReceiver, InstrumentedAsyncSender, 12 | }; 13 | use std::time::{Duration, Instant}; 14 | use tokio::task::JoinHandle; 15 | use tracing::{error, info, warn}; 16 | 17 | #[async_trait] 18 | pub trait AsyncStep 19 | where 20 | Self: Processable + Send + Sized + 'static, 21 | { 22 | } 23 | 24 | pub struct AsyncRunType; 25 | 26 | impl RunnableStepType for AsyncRunType {} 27 | 28 | pub struct RunnableAsyncStep 29 | where 30 | Step: AsyncStep, 31 | { 32 | pub step: Step, 33 | } 34 | 35 | impl RunnableAsyncStep 36 | where 37 | Step: AsyncStep, 38 | { 39 | pub fn new(step: Step) -> Self { 40 | Self { step } 41 | } 42 | } 43 | 44 | impl NamedStep for RunnableAsyncStep 45 | where 46 | Step: 'static + AsyncStep + Send + Sized, 47 | { 48 | fn name(&self) -> String { 49 | self.step.name() 50 | } 51 | 52 | fn type_name(&self) -> String { 53 | let step_type = std::any::type_name::().to_string(); 54 | format!("{step_type} (via RunnableAsyncStep)",) 55 | } 56 | } 57 | 58 | impl IntoRunnableStep for Step 59 | where 60 | Step: AsyncStep + Send + Sized + 'static, 61 | { 62 | fn into_runnable_step(self) -> impl RunnableStep { 63 | RunnableAsyncStep::new(self) 64 | } 65 | } 66 | 67 | impl RunnableStep for RunnableAsyncStep 68 | where 69 | Step: AsyncStep + Send + Sized + 'static, 70 | { 71 | fn spawn( 72 | self, 73 | input_receiver: Option>>, 74 | output_channel_size: usize, 75 | _input_sender: Option>>, 76 | ) -> ( 77 | InstrumentedAsyncReceiver>, 78 | JoinHandle<()>, 79 | ) { 80 | let mut step = self.step; 81 | let step_name = step.name(); 82 | let input_receiver = input_receiver.expect("Input receiver must be set"); 83 | 84 | let (output_sender, output_receiver) = 85 | instrumented_bounded_channel(&step_name, output_channel_size); 86 | 87 | info!(step_name = step_name, "Spawning processing task"); 88 | let handle = tokio::spawn(async move { 89 | loop { 90 | let input_with_context = match input_receiver.recv().await { 91 | Ok(input_with_context) => input_with_context, 92 | Err(e) => { 93 | // If the previous steps have finished and the channels have closed , we should break out of the loop 94 | warn!( 95 | step_name = step_name, 96 | error = e.to_string(), 97 | "No input received from channel" 98 | ); 99 | break; 100 | }, 101 | }; 102 | let processing_duration = Instant::now(); 103 | let output_with_context = match step.process(input_with_context).await { 104 | Ok(output_with_context) => output_with_context, 105 | Err(e) => { 106 | error!( 107 | step_name = step_name, 108 | error = e.to_string(), 109 | "Failed to process input" 110 | ); 111 | break; 112 | }, 113 | }; 114 | if let Some(output_with_context) = output_with_context { 115 | match StepMetricsBuilder::default() 116 | .labels(StepMetricLabels { 117 | step_name: step.name(), 118 | }) 119 | .latest_processed_version(output_with_context.metadata.end_version) 120 | .processed_transaction_latency( 121 | output_with_context.get_transaction_latency(), 122 | ) 123 | .latest_transaction_timestamp( 124 | output_with_context.get_start_transaction_timestamp_unix(), 125 | ) 126 | .num_transactions_processed_count( 127 | output_with_context.get_num_transactions(), 128 | ) 129 | .processing_duration_in_secs(processing_duration.elapsed().as_secs_f64()) 130 | .processed_size_in_bytes(output_with_context.metadata.total_size_in_bytes) 131 | .build() 132 | { 133 | Ok(mut metrics) => metrics.log_metrics(), 134 | Err(e) => { 135 | error!( 136 | step_name = step_name, 137 | error = e.to_string(), 138 | "Failed to log metrics" 139 | ); 140 | break; 141 | }, 142 | } 143 | match output_sender.send(output_with_context).await { 144 | Ok(_) => (), 145 | Err(e) => { 146 | error!( 147 | step_name = step_name, 148 | error = e.to_string(), 149 | "Error sending output to channel" 150 | ); 151 | break; 152 | }, 153 | } 154 | } 155 | } 156 | 157 | // Wait for output channel to be empty before ending the task and closing the send channel 158 | loop { 159 | let channel_size = output_sender.len(); 160 | info!( 161 | step_name = step_name, 162 | channel_size = channel_size, 163 | "Waiting for output channel to be empty" 164 | ); 165 | if channel_size.is_zero() { 166 | break; 167 | } 168 | tokio::time::sleep(Duration::from_millis(100)).await; 169 | } 170 | info!( 171 | step_name = step_name, 172 | "Output channel is empty. Closing send channel." 173 | ); 174 | }); 175 | 176 | (output_receiver, handle) 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/instrumentation.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | pub trait NamedStep { 4 | fn name(&self) -> String; 5 | 6 | fn type_name(&self) -> String { 7 | std::any::type_name::().to_string() 8 | } 9 | } 10 | 11 | pub struct StepInstrumentor 12 | where 13 | Step: NamedStep + Send + Sized + 'static, 14 | { 15 | _step: PhantomData, 16 | } 17 | 18 | impl Default for StepInstrumentor 19 | where 20 | Step: NamedStep + Send + Sized + 'static, 21 | { 22 | fn default() -> Self { 23 | Self { 24 | _step: Default::default(), 25 | } 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/into_runnable_step.rs: -------------------------------------------------------------------------------- 1 | use crate::traits::{Processable, RunnableStep}; 2 | 3 | pub trait IntoRunnableStep< 4 | Input, 5 | Output, 6 | Step: Processable, 7 | RunnableType = ::RunType, 8 | > where 9 | Self: Send + Sized + 'static, 10 | Input: Send + 'static, 11 | Output: Send + 'static, 12 | { 13 | fn into_runnable_step(self) -> impl RunnableStep; 14 | } 15 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod async_step; 2 | pub mod instrumentation; 3 | pub mod into_runnable_step; 4 | pub mod pollable_async_step; 5 | pub mod processable; 6 | pub mod processor_trait; 7 | pub mod runnable_step; 8 | 9 | // Re-export the structs and traits 10 | pub use async_step::{AsyncRunType, AsyncStep, RunnableAsyncStep}; 11 | pub use instrumentation::NamedStep; 12 | pub use into_runnable_step::IntoRunnableStep; 13 | pub use pollable_async_step::{PollableAsyncRunType, PollableAsyncStep, RunnablePollableStep}; 14 | pub use processable::{Processable, RunnableStepType}; 15 | pub use runnable_step::{RunnableStep, RunnableStepWithInputReceiver}; 16 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/processable.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | traits::NamedStep, types::transaction_context::TransactionContext, 3 | utils::errors::ProcessorError, 4 | }; 5 | use anyhow::Result; 6 | use async_trait::async_trait; 7 | 8 | /// Trait to convince the compiler that different step types are mutually exclusive 9 | pub trait RunnableStepType {} 10 | 11 | // This is a dummy implementation for the unit type 12 | impl RunnableStepType for () {} 13 | 14 | #[async_trait] 15 | pub trait Processable 16 | where 17 | Self: NamedStep + Send + Sized + 'static, 18 | { 19 | type Input: Send + 'static; 20 | type Output: Send + 'static; 21 | // This is to convince the compiler of mutual exclusivity of different step impls 22 | type RunType: RunnableStepType; 23 | 24 | /// Lifecycle methods 25 | async fn init(&mut self) {} 26 | async fn cleanup( 27 | &mut self, 28 | ) -> Result>>, ProcessorError> { 29 | Ok(None) 30 | } 31 | 32 | /// Processes a batch of input items and returns a batch of output items. 33 | async fn process( 34 | &mut self, 35 | items: TransactionContext, 36 | ) -> Result>, ProcessorError>; 37 | } 38 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/processor_trait.rs: -------------------------------------------------------------------------------- 1 | use async_trait::async_trait; 2 | 3 | #[async_trait] 4 | pub trait ProcessorTrait: Send + Sync { 5 | fn name(&self) -> &'static str; 6 | async fn run_processor(&self) -> anyhow::Result<()>; 7 | } 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/traits/runnable_step.rs: -------------------------------------------------------------------------------- 1 | use crate::{traits::NamedStep, types::transaction_context::TransactionContext}; 2 | use instrumented_channel::{InstrumentedAsyncReceiver, InstrumentedAsyncSender}; 3 | use std::marker::PhantomData; 4 | use tokio::task::JoinHandle; 5 | 6 | pub trait RunnableStep: NamedStep 7 | where 8 | Self: Send + Sized + 'static, 9 | Input: Send + 'static, 10 | Output: Send + 'static, 11 | { 12 | #[allow(clippy::too_long_first_doc_paragraph)] 13 | /// Runs the step, forever, with the given input receiver and returns the output receiver and the join handle. 14 | fn spawn( 15 | self, 16 | input_receiver: Option>>, 17 | output_channel_size: usize, 18 | _input_sender: Option>>, 19 | ) -> ( 20 | InstrumentedAsyncReceiver>, 21 | JoinHandle<()>, 22 | ); 23 | 24 | fn add_input_receiver( 25 | self, 26 | input_receiver: InstrumentedAsyncReceiver>, 27 | ) -> RunnableStepWithInputReceiver { 28 | RunnableStepWithInputReceiver::new(input_receiver, self) 29 | } 30 | 31 | fn type_name(&self) -> String { 32 | ::type_name(self) 33 | } 34 | } 35 | 36 | pub struct RunnableStepWithInputReceiver 37 | where 38 | Input: Send + 'static, 39 | Output: Send + 'static, 40 | Step: RunnableStep, 41 | { 42 | pub input_receiver: InstrumentedAsyncReceiver>, 43 | pub step: Step, 44 | _output: PhantomData, 45 | pub _input_sender: Option>>, 46 | } 47 | 48 | impl RunnableStepWithInputReceiver 49 | where 50 | Input: Send + 'static, 51 | Output: Send + 'static, 52 | Step: RunnableStep, 53 | { 54 | pub fn new( 55 | input_receiver: InstrumentedAsyncReceiver>, 56 | step: Step, 57 | ) -> Self { 58 | Self { 59 | input_receiver, 60 | step, 61 | _output: Default::default(), 62 | _input_sender: None, 63 | } 64 | } 65 | 66 | #[allow(clippy::too_long_first_doc_paragraph)] 67 | /// This should only be used for the inputless first step to keep the async sender in scope so the channel stays alive. 68 | pub fn add_input_sender( 69 | mut self, 70 | _input_sender: InstrumentedAsyncSender>, 71 | ) -> Self { 72 | self._input_sender = Some(_input_sender); 73 | self 74 | } 75 | } 76 | 77 | impl NamedStep for RunnableStepWithInputReceiver 78 | where 79 | Input: 'static + Send, 80 | Output: 'static + Send, 81 | Step: RunnableStep, 82 | { 83 | fn name(&self) -> String { 84 | self.step.name() 85 | } 86 | 87 | fn type_name(&self) -> String { 88 | format!( 89 | "{} (via RunnableStepWithInputReceiver)", 90 | RunnableStep::type_name(&self.step) 91 | ) 92 | } 93 | } 94 | 95 | impl RunnableStep 96 | for RunnableStepWithInputReceiver 97 | where 98 | Input: Send + 'static, 99 | Output: Send + 'static, 100 | Step: RunnableStep, 101 | { 102 | fn spawn( 103 | self, 104 | input_receiver: Option>>, 105 | channel_size: usize, 106 | _input_sender: Option>>, 107 | ) -> ( 108 | InstrumentedAsyncReceiver>, 109 | JoinHandle<()>, 110 | ) { 111 | if input_receiver.is_some() { 112 | panic!("Input receiver already set for {:?}", self.name()); 113 | } 114 | self.step 115 | .spawn(Some(self.input_receiver), channel_size, _input_sender) 116 | } 117 | 118 | fn add_input_receiver( 119 | self, 120 | _input_receiver: InstrumentedAsyncReceiver>, 121 | ) -> RunnableStepWithInputReceiver { 122 | panic!("Input receiver already set for {:?}", self.name()); 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/types/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod transaction_context; 2 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/types/transaction_context.rs: -------------------------------------------------------------------------------- 1 | use aptos_indexer_transaction_stream::utils::time::{ 2 | time_diff_since_pb_timestamp_in_secs, timestamp_to_unixtime, 3 | }; 4 | 5 | /// Contains processed data and associated transaction metadata. 6 | /// 7 | /// The processed data is extracted from transactions and the 8 | /// TransactionContext contains additional metadata about which transactions the extracted 9 | /// data originated from. The metadata is used for metrics and logging purposes. 10 | #[derive(Clone, Default)] 11 | pub struct TransactionContext { 12 | pub data: T, 13 | pub metadata: TransactionMetadata, 14 | } 15 | 16 | impl TransactionContext { 17 | pub fn get_num_transactions(&self) -> u64 { 18 | self.metadata.end_version - self.metadata.start_version + 1 19 | } 20 | 21 | pub fn get_start_transaction_timestamp_unix(&self) -> Option { 22 | self.metadata 23 | .start_transaction_timestamp 24 | .as_ref() 25 | .map(timestamp_to_unixtime) 26 | } 27 | 28 | pub fn get_transaction_latency(&self) -> Option { 29 | self.metadata 30 | .start_transaction_timestamp 31 | .as_ref() 32 | .map(time_diff_since_pb_timestamp_in_secs) 33 | } 34 | } 35 | 36 | impl Ord for TransactionContext { 37 | fn cmp(&self, other: &Self) -> std::cmp::Ordering { 38 | self.metadata 39 | .start_version 40 | .cmp(&other.metadata.start_version) 41 | } 42 | } 43 | 44 | impl PartialOrd for TransactionContext { 45 | fn partial_cmp(&self, other: &Self) -> Option { 46 | Some(self.cmp(other)) 47 | } 48 | } 49 | 50 | impl Eq for TransactionContext {} 51 | 52 | impl PartialEq for TransactionContext { 53 | fn eq(&self, other: &Self) -> bool { 54 | self.metadata.start_version == other.metadata.start_version 55 | } 56 | } 57 | 58 | // Metadata about a batch of transactions 59 | #[derive(Clone, Default)] 60 | pub struct TransactionMetadata { 61 | pub start_version: u64, 62 | pub end_version: u64, 63 | pub start_transaction_timestamp: Option, 64 | pub end_transaction_timestamp: Option, 65 | pub total_size_in_bytes: u64, 66 | } 67 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utils 2 | 3 | ## Chain ID Check 4 | 5 | The `chain_id_check.rs` file provides tools to manage and verify the chain ID during processing. It helps to ensure the processor is indexing the correct chain ID. 6 | 7 | ### ChainIdChecker Trait 8 | 9 | This trait has two main functions that need to be implemented: 10 | 11 | - `save_chain_id`: Saves the current chain ID to storage. 12 | - `get_chain_id`: Retrieves the chain ID from storage. 13 | 14 | 15 | ### `check_or_update_chain_id` Function 16 | 17 | This function checks if the chain ID from a `TransactionStream` matches the one in storage. If they match, processing continues. If not, it updates the storage with the new chain ID. This helps prevent processing errors due to mismatched chain IDs. 18 | 19 | Use this function in your processor to manage the chain ID. 20 | 21 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/chain_id_check.rs: -------------------------------------------------------------------------------- 1 | use super::errors::ProcessorError; 2 | use anyhow::Result; 3 | use aptos_indexer_transaction_stream::{TransactionStream, TransactionStreamConfig}; 4 | use async_trait::async_trait; 5 | use tracing::info; 6 | 7 | #[async_trait] 8 | pub trait ChainIdChecker { 9 | /// Save the chain ID to storage. This is used to track the chain ID that's being processed 10 | /// and prevents the processor from processing the wrong chain. 11 | async fn save_chain_id(&self, chain_id: u64) -> Result<()>; 12 | 13 | /// Get the chain ID from storage. This is used to track the chain ID that's being processed 14 | /// and prevents the processor from processing the wrong chain. 15 | async fn get_chain_id(&self) -> Result>; 16 | } 17 | 18 | /// Verify the chain id from TransactionStream against the database. 19 | pub async fn check_or_update_chain_id( 20 | transaction_stream_config: &TransactionStreamConfig, 21 | chain_id_checker: &T, 22 | ) -> Result 23 | where 24 | T: ChainIdChecker, 25 | { 26 | info!("Checking if chain id is correct"); 27 | let maybe_existing_chain_id = 28 | chain_id_checker 29 | .get_chain_id() 30 | .await 31 | .map_err(|e| ProcessorError::ChainIdCheckError { 32 | message: format!("Error getting chain id from db: {e:?}"), 33 | })?; 34 | 35 | let transaction_stream = TransactionStream::new(transaction_stream_config.clone()) 36 | .await 37 | .map_err(|e| ProcessorError::ChainIdCheckError { 38 | message: format!("Error initializing transaction stream: {e:?}"), 39 | })?; 40 | let grpc_chain_id = 41 | transaction_stream 42 | .get_chain_id() 43 | .await 44 | .map_err(|e| ProcessorError::ChainIdCheckError { 45 | message: format!("Error getting chain id from transaction stream: {e:?}"), 46 | })?; 47 | 48 | match maybe_existing_chain_id { 49 | Some(chain_id) => { 50 | if chain_id != grpc_chain_id { 51 | return Err(ProcessorError::ChainIdCheckError { 52 | message: format!( 53 | "Wrong chain id detected! Trying to index chain {grpc_chain_id} now but existing data is for chain {chain_id}", 54 | ), 55 | }); 56 | } 57 | 58 | info!( 59 | chain_id = chain_id, 60 | "Chain id matches! Continue to index...", 61 | ); 62 | Ok(chain_id) 63 | }, 64 | None => { 65 | info!( 66 | chain_id = grpc_chain_id, 67 | "Saving chain id to db, continue to index..." 68 | ); 69 | chain_id_checker 70 | .save_chain_id(grpc_chain_id) 71 | .await 72 | .map_err(|e| ProcessorError::ChainIdCheckError { 73 | message: format!("Error saving chain id to db: {e:?}"), 74 | })?; 75 | Ok(grpc_chain_id) 76 | }, 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/constants.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Constant values useful for indexing. 5 | 6 | use once_cell::sync::Lazy; 7 | 8 | /// Type string for AptosCoin. 9 | pub const APTOS_COIN_TYPE_STR: &str = "0x1::aptos_coin::AptosCoin"; 10 | 11 | pub static APT_METADATA_ADDRESS_RAW: Lazy<[u8; 32]> = Lazy::new(|| { 12 | let mut addr = [0u8; 32]; 13 | addr[31] = 10u8; 14 | addr 15 | }); 16 | 17 | pub static APT_METADATA_ADDRESS_HEX: Lazy = 18 | Lazy::new(|| format!("0x{}", hex::encode(*APT_METADATA_ADDRESS_RAW))); 19 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/convert.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Helpers related to basic conversion like string manipulation, converting between 5 | //! number types, BCS, and hashing. 6 | 7 | use bigdecimal::{BigDecimal, Signed, ToPrimitive, Zero}; 8 | use serde::{Deserialize, Deserializer}; 9 | use serde_json::Value; 10 | use std::str::FromStr; 11 | use tiny_keccak::{Hasher, Sha3}; 12 | 13 | #[allow(clippy::too_long_first_doc_paragraph)] 14 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string). 15 | pub fn standardize_address(handle: &str) -> String { 16 | if let Some(handle) = handle.strip_prefix("0x") { 17 | format!("0x{handle:0>64}") 18 | } else { 19 | format!("0x{handle:0>64}") 20 | } 21 | } 22 | 23 | #[allow(clippy::too_long_first_doc_paragraph)] 24 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string). 25 | pub fn standardize_address_from_bytes(bytes: &[u8]) -> String { 26 | let encoded_bytes = hex::encode(bytes); 27 | standardize_address(&encoded_bytes) 28 | } 29 | 30 | /// Convert a hex string into a raw byte string. Any leading 0x will be stripped. 31 | pub fn hex_to_raw_bytes(val: &str) -> anyhow::Result> { 32 | Ok(hex::decode(val.strip_prefix("0x").unwrap_or(val))?) 33 | } 34 | 35 | /// Truncate a string to a maximum number of characters. 36 | pub fn truncate_str(val: &str, max_chars: usize) -> String { 37 | let mut trunc = val.to_string(); 38 | trunc.truncate(max_chars); 39 | trunc 40 | } 41 | 42 | pub fn sha3_256(buffer: &[u8]) -> [u8; 32] { 43 | let mut output = [0; 32]; 44 | let mut sha3 = Sha3::v256(); 45 | sha3.update(buffer); 46 | sha3.finalize(&mut output); 47 | output 48 | } 49 | 50 | pub fn u64_to_bigdecimal(val: u64) -> BigDecimal { 51 | BigDecimal::from(val) 52 | } 53 | 54 | pub fn bigdecimal_to_u64(val: &BigDecimal) -> u64 { 55 | val.to_u64().expect("Unable to convert big decimal to u64") 56 | } 57 | 58 | pub fn ensure_not_negative(val: BigDecimal) -> BigDecimal { 59 | if val.is_negative() { 60 | return BigDecimal::zero(); 61 | } 62 | val 63 | } 64 | 65 | /// Remove null bytes from a JSON object. 66 | pub fn remove_null_bytes serde::Deserialize<'de>>(input: &T) -> T { 67 | let mut txn_json = serde_json::to_value(input).unwrap(); 68 | recurse_remove_null_bytes_from_json(&mut txn_json); 69 | serde_json::from_value::(txn_json).unwrap() 70 | } 71 | 72 | fn recurse_remove_null_bytes_from_json(sub_json: &mut Value) { 73 | match sub_json { 74 | Value::Array(array) => { 75 | for item in array { 76 | recurse_remove_null_bytes_from_json(item); 77 | } 78 | }, 79 | Value::Object(object) => { 80 | for (_key, value) in object { 81 | recurse_remove_null_bytes_from_json(value); 82 | } 83 | }, 84 | Value::String(str) => { 85 | if !str.is_empty() { 86 | let replacement = string_null_byte_replacement(str); 87 | *str = replacement; 88 | } 89 | }, 90 | _ => {}, 91 | } 92 | } 93 | 94 | fn string_null_byte_replacement(value: &str) -> String { 95 | value.replace('\u{0000}', "").replace("\\u0000", "") 96 | } 97 | 98 | pub fn deserialize_string_from_hexstring<'de, D>( 99 | deserializer: D, 100 | ) -> core::result::Result 101 | where 102 | D: Deserializer<'de>, 103 | { 104 | let s = ::deserialize(deserializer)?; 105 | Ok(String::from_utf8(hex_to_raw_bytes(&s).unwrap()).unwrap_or(s)) 106 | } 107 | 108 | /// Deserialize from string to type T 109 | pub fn deserialize_from_string<'de, D, T>(deserializer: D) -> Result 110 | where 111 | D: Deserializer<'de>, 112 | T: FromStr, 113 | ::Err: std::fmt::Display, 114 | { 115 | use serde::de::Error; 116 | 117 | let s = ::deserialize(deserializer)?; 118 | s.parse::().map_err(D::Error::custom) 119 | } 120 | 121 | /// Convert the bcs serialized vector to its original string format 122 | pub fn convert_bcs_hex(typ: String, value: String) -> Option { 123 | let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?; 124 | 125 | match typ.as_str() { 126 | "0x1::string::String" => bcs::from_bytes::(decoded.as_slice()), 127 | "u8" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 128 | "u64" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 129 | "u128" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 130 | "bool" => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 131 | "address" => bcs::from_bytes::(decoded.as_slice()).map(|e| format!("0x{e}")), 132 | _ => Ok(value), 133 | } 134 | .ok() 135 | } 136 | 137 | /// Convert the bcs serialized vector to its original string format for token v2 property map. 138 | pub fn convert_bcs_hex_new(typ: u8, value: String) -> Option { 139 | let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?; 140 | 141 | match typ { 142 | 0 /* bool */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 143 | 1 /* u8 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 144 | 2 /* u16 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 145 | 3 /* u32 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 146 | 4 /* u64 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 147 | 5 /* u128 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 148 | 6 /* u256 */ => bcs::from_bytes::(decoded.as_slice()).map(|e| e.to_string()), 149 | 7 /* address */ => bcs::from_bytes::(decoded.as_slice()).map(|e| format!("0x{e}")), 150 | 8 /* byte_vector */ => bcs::from_bytes::>(decoded.as_slice()).map(|e| format!("0x{}", hex::encode(e))), 151 | 9 /* string */ => bcs::from_bytes::(decoded.as_slice()), 152 | _ => Ok(value), 153 | } 154 | .ok() 155 | } 156 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/errors.rs: -------------------------------------------------------------------------------- 1 | use thiserror::Error; 2 | 3 | #[derive(Error, Debug)] 4 | pub enum ProcessorError { 5 | #[error("Step Init Error: {message}")] 6 | StepInitError { message: String }, 7 | #[error("Process Error: {message}")] 8 | ProcessError { message: String }, 9 | #[error("Poll Error: {message}")] 10 | PollError { message: String }, 11 | #[error("DB Store Error: {message}, Query: {query:?}")] 12 | DBStoreError { 13 | message: String, 14 | query: Option, 15 | }, 16 | #[error("Chain ID Check Error: {message}")] 17 | ChainIdCheckError { message: String }, 18 | } 19 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod chain_id_check; 2 | pub mod constants; 3 | pub mod convert; 4 | pub mod errors; 5 | pub mod extract; 6 | pub mod property_map; 7 | pub mod step_metrics; 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/sdk/src/utils/property_map.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | use crate::utils::convert::{convert_bcs_hex, convert_bcs_hex_new}; 5 | use ahash::AHashMap; 6 | use indexmap::IndexMap; 7 | use serde::{Deserialize, Serialize}; 8 | use serde_json::{Result, Value}; 9 | #[derive(Serialize, Deserialize, Debug, Clone)] 10 | pub struct PropertyValue { 11 | value: String, 12 | typ: String, 13 | } 14 | 15 | pub fn create_property_value(typ: String, value: String) -> Result { 16 | Ok(PropertyValue { 17 | value: convert_bcs_hex(typ.clone(), value.clone()).unwrap_or(value), 18 | typ, 19 | }) 20 | } 21 | 22 | #[derive(Serialize, Deserialize, Debug, Clone)] 23 | pub struct PropertyMap { 24 | data: IndexMap, 25 | } 26 | 27 | impl PropertyMap { 28 | /// Deserializes PropertyValue from bcs encoded json 29 | pub fn from_bcs_encode_str(val: Value) -> Option { 30 | let mut pm = PropertyMap { 31 | data: IndexMap::new(), 32 | }; 33 | let records: &Vec = val.get("map")?.get("data")?.as_array()?; 34 | for entry in records { 35 | let key = entry.get("key")?.as_str()?; 36 | let val = entry.get("value")?.get("value")?.as_str()?; 37 | let typ = entry.get("value")?.get("type")?.as_str()?; 38 | let pv = create_property_value(typ.to_string(), val.to_string()).ok()?; 39 | pm.data.insert(key.to_string(), pv); 40 | } 41 | Some(Self::to_flat_json(pm)) 42 | } 43 | 44 | /// Flattens PropertyMap which can't be easily consumable by downstream. 45 | /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}} 46 | /// becomes Object {"creation_time_sec": "1666125588"} 47 | fn to_flat_json(val: PropertyMap) -> Value { 48 | let mut map = AHashMap::new(); 49 | for (k, v) in val.data { 50 | map.insert(k, v.value); 51 | } 52 | serde_json::to_value(map).unwrap() 53 | } 54 | } 55 | 56 | #[derive(Serialize, Deserialize, Debug, Clone)] 57 | pub struct TokenObjectPropertyValue { 58 | value: String, 59 | typ: u8, 60 | } 61 | 62 | pub fn create_token_object_property_value( 63 | typ: u8, 64 | value: String, 65 | ) -> Result { 66 | Ok(TokenObjectPropertyValue { 67 | value: convert_bcs_hex_new(typ, value.clone()).unwrap_or(value), 68 | typ, 69 | }) 70 | } 71 | 72 | #[derive(Serialize, Deserialize, Debug, Clone)] 73 | pub struct TokenObjectPropertyMap { 74 | data: IndexMap, 75 | } 76 | 77 | impl TokenObjectPropertyMap { 78 | /// Deserializes PropertyValue from bcs encoded json 79 | pub fn from_bcs_encode_str(val: Value) -> Option { 80 | let mut pm = TokenObjectPropertyMap { 81 | data: IndexMap::new(), 82 | }; 83 | let records: &Vec = val.get("data")?.as_array()?; 84 | for entry in records { 85 | let key = entry.get("key")?.as_str()?; 86 | let val = entry.get("value")?.get("value")?.as_str()?; 87 | let typ = entry.get("value")?.get("type")?.as_u64()?; 88 | let pv = create_token_object_property_value(typ as u8, val.to_string()).ok()?; 89 | pm.data.insert(key.to_string(), pv); 90 | } 91 | Some(Self::to_flat_json_new(pm)) 92 | } 93 | 94 | /// Flattens PropertyMap which can't be easily consumable by downstream. 95 | /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}} 96 | /// becomes Object {"creation_time_sec": "1666125588"} 97 | fn to_flat_json_new(val: TokenObjectPropertyMap) -> Value { 98 | let mut map = IndexMap::new(); 99 | for (k, v) in val.data { 100 | map.insert(k, v.value); 101 | } 102 | serde_json::to_value(map).unwrap() 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "aptos-indexer-transaction-stream" 3 | version = "0.1.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | 15 | [dependencies] 16 | anyhow = { workspace = true } 17 | aptos-moving-average = { workspace = true } 18 | aptos-protos = { workspace = true } 19 | aptos-transaction-filter = { workspace = true } 20 | chrono = { workspace = true } 21 | futures-util = { workspace = true } 22 | once_cell = { workspace = true } 23 | prometheus = { workspace = true } 24 | prost = { workspace = true } 25 | sample = { workspace = true } 26 | serde = { workspace = true } 27 | tokio = { workspace = true } 28 | tonic = { workspace = true } 29 | tracing = { workspace = true } 30 | url = { workspace = true } 31 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/config.rs: -------------------------------------------------------------------------------- 1 | use crate::utils::additional_headers::AdditionalHeaders; 2 | use aptos_transaction_filter::BooleanTransactionFilter; 3 | use serde::{Deserialize, Serialize}; 4 | use std::time::Duration; 5 | use url::Url; 6 | 7 | #[derive(Clone, Debug, Deserialize, Serialize)] 8 | #[serde(deny_unknown_fields)] 9 | pub struct TransactionStreamConfig { 10 | pub indexer_grpc_data_service_address: Url, 11 | pub starting_version: Option, 12 | pub request_ending_version: Option, 13 | pub auth_token: String, 14 | pub request_name_header: String, 15 | #[serde(default)] 16 | pub additional_headers: AdditionalHeaders, 17 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_interval")] 18 | pub indexer_grpc_http2_ping_interval_secs: u64, 19 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_timeout")] 20 | pub indexer_grpc_http2_ping_timeout_secs: u64, 21 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_timeout")] 22 | pub indexer_grpc_reconnection_timeout_secs: u64, 23 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_response_item_timeout")] 24 | pub indexer_grpc_response_item_timeout_secs: u64, 25 | #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_max_retries")] 26 | pub indexer_grpc_reconnection_max_retries: u64, 27 | #[serde(default)] 28 | pub transaction_filter: Option, 29 | } 30 | 31 | impl TransactionStreamConfig { 32 | pub const fn indexer_grpc_http2_ping_interval(&self) -> Duration { 33 | Duration::from_secs(self.indexer_grpc_http2_ping_interval_secs) 34 | } 35 | 36 | pub const fn indexer_grpc_http2_ping_timeout(&self) -> Duration { 37 | Duration::from_secs(self.indexer_grpc_http2_ping_timeout_secs) 38 | } 39 | 40 | pub const fn indexer_grpc_reconnection_timeout(&self) -> Duration { 41 | Duration::from_secs(self.indexer_grpc_reconnection_timeout_secs) 42 | } 43 | 44 | pub const fn indexer_grpc_response_item_timeout(&self) -> Duration { 45 | Duration::from_secs(self.indexer_grpc_response_item_timeout_secs) 46 | } 47 | 48 | /// Indexer GRPC http2 ping interval in seconds. Defaults to 30. 49 | /// Tonic ref: https://docs.rs/tonic/latest/tonic/transport/channel/struct.Endpoint.html#method.http2_keep_alive_interval 50 | pub const fn default_indexer_grpc_http2_ping_interval() -> u64 { 51 | 30 52 | } 53 | 54 | /// Indexer GRPC http2 ping timeout in seconds. Defaults to 10. 55 | pub const fn default_indexer_grpc_http2_ping_timeout() -> u64 { 56 | 10 57 | } 58 | 59 | /// Default timeout for establishing a grpc connection. Defaults to 5 seconds. 60 | pub const fn default_indexer_grpc_reconnection_timeout() -> u64 { 61 | 5 62 | } 63 | 64 | /// Default timeout for receiving an item from grpc stream. Defaults to 60 seconds. 65 | pub const fn default_indexer_grpc_response_item_timeout() -> u64 { 66 | 60 67 | } 68 | 69 | /// Default max retries for reconnecting to grpc. Defaults to 100. 70 | pub const fn default_indexer_grpc_reconnection_max_retries() -> u64 { 71 | 5 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod config; 2 | pub mod transaction_stream; 3 | pub mod utils; 4 | 5 | pub use aptos_transaction_filter::*; 6 | pub use config::TransactionStreamConfig; 7 | pub use transaction_stream::{TransactionStream, TransactionsPBResponse}; 8 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/additional_headers.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{Context, Result}; 2 | use serde::{Deserialize, Serialize}; 3 | use std::{collections::HashMap, str::FromStr}; 4 | use tonic::metadata::{Ascii, MetadataKey, MetadataMap, MetadataValue}; 5 | 6 | #[allow(clippy::too_long_first_doc_paragraph)] 7 | /// This struct holds additional headers that we attach to the request metadata. 8 | /// Regarding serde, we just serialize this as we would a HashMap. 9 | /// Similarly, we expect that format when deserializing. 10 | /// 11 | /// It is necessary to use HashMap because there is no extend method on MetadataMap 12 | /// itself, nor does it implement Serialize / Deserialize. It is better to parse once 13 | /// here right at config validation time anyway, it exposes any error as early as 14 | /// possible and saves us doing parsing (perhaps multiple times) later. 15 | #[derive(Clone, Debug, Default, Serialize, Deserialize)] 16 | #[serde(try_from = "HashMap")] 17 | #[serde(into = "HashMap")] 18 | pub struct AdditionalHeaders(HashMap, MetadataValue>); 19 | 20 | impl AdditionalHeaders { 21 | pub fn drain_into_metadata_map(self, metadata_map: &mut MetadataMap) { 22 | for (key, value) in self.0 { 23 | metadata_map.insert(key, value); 24 | } 25 | } 26 | } 27 | 28 | impl TryFrom> for AdditionalHeaders { 29 | type Error = anyhow::Error; 30 | 31 | /// Build `AdditionalHeaders` from just a map of strings. This can fail if the 32 | /// strings contain invalid characters for metadata keys / values, the chars must 33 | /// only be visible ascii characters. 34 | fn try_from(map: HashMap) -> Result { 35 | let mut out = HashMap::new(); 36 | for (k, v) in map { 37 | let k = MetadataKey::from_str(&k) 38 | .with_context(|| format!("Failed to parse key as ascii metadata key: {k}"))?; 39 | let v = MetadataValue::from_str(&v) 40 | .with_context(|| format!("Failed to parse value as ascii metadata value: {v}"))?; 41 | out.insert(k, v); 42 | } 43 | Ok(AdditionalHeaders(out)) 44 | } 45 | } 46 | 47 | impl From for HashMap { 48 | fn from(headers: AdditionalHeaders) -> Self { 49 | headers 50 | .0 51 | .into_iter() 52 | // It is safe to unwrap here because when building this we asserted that the 53 | // MetadataValue only contained visible ascii characters. 54 | .map(|(k, v)| (k.as_str().to_owned(), v.to_str().unwrap().to_owned())) 55 | .collect() 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod additional_headers; 2 | pub mod time; 3 | -------------------------------------------------------------------------------- /aptos-indexer-processors-sdk/transaction-stream/src/utils/time.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | //! Helpers related to dealing with dates and times. 5 | 6 | use aptos_protos::util::timestamp::Timestamp; 7 | use chrono::Utc; 8 | 9 | /// 9999-12-31 23:59:59, this is the max supported by Google BigQuery. 10 | pub const MAX_TIMESTAMP_SECS: i64 = 253_402_300_799; 11 | 12 | pub fn parse_timestamp(ts: &Timestamp, version: i64) -> chrono::DateTime { 13 | let final_ts = if ts.seconds >= MAX_TIMESTAMP_SECS || ts.seconds < 0 { 14 | Timestamp { 15 | seconds: MAX_TIMESTAMP_SECS, 16 | nanos: 0, 17 | } 18 | } else { 19 | *ts 20 | }; 21 | chrono::DateTime::from_timestamp(final_ts.seconds, final_ts.nanos as u32) 22 | .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}")) 23 | } 24 | 25 | pub fn parse_timestamp_secs(ts: u64, version: i64) -> chrono::DateTime { 26 | chrono::DateTime::from_timestamp(std::cmp::min(ts, MAX_TIMESTAMP_SECS as u64) as i64, 0) 27 | .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}")) 28 | } 29 | 30 | pub fn compute_nanos_since_epoch(datetime: chrono::DateTime) -> u64 { 31 | // The Unix epoch is 1970-01-01T00:00:00Z 32 | let unix_epoch = chrono::DateTime::::from_timestamp(0, 0).unwrap(); 33 | let duration_since_epoch = datetime.signed_duration_since(unix_epoch); 34 | 35 | // Convert the duration to nanoseconds and return 36 | duration_since_epoch.num_seconds() as u64 * 1_000_000_000 37 | + duration_since_epoch.subsec_nanos() as u64 38 | } 39 | 40 | /// Convert the protobuf Timestamp to epcoh time in seconds. 41 | pub fn time_diff_since_pb_timestamp_in_secs(timestamp: &Timestamp) -> f64 { 42 | let current_timestamp = std::time::SystemTime::now() 43 | .duration_since(std::time::UNIX_EPOCH) 44 | .expect("SystemTime before UNIX EPOCH!") 45 | .as_secs_f64(); 46 | let transaction_time = timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9; 47 | current_timestamp - transaction_time 48 | } 49 | 50 | /// Convert the protobuf timestamp to ISO format 51 | pub fn timestamp_to_iso(timestamp: &Timestamp) -> String { 52 | let dt = parse_timestamp(timestamp, 0); 53 | dt.format("%Y-%m-%dT%H:%M:%S%.9fZ").to_string() 54 | } 55 | 56 | /// Convert the protobuf timestamp to unixtime 57 | pub fn timestamp_to_unixtime(timestamp: &Timestamp) -> f64 { 58 | timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use super::*; 64 | use chrono::Datelike; 65 | 66 | #[test] 67 | fn test_parse_timestamp() { 68 | let ts = parse_timestamp( 69 | &Timestamp { 70 | seconds: 1649560602, 71 | nanos: 0, 72 | }, 73 | 1, 74 | ) 75 | .naive_utc(); 76 | assert_eq!(ts.and_utc().timestamp(), 1649560602); 77 | assert_eq!(ts.year(), 2022); 78 | 79 | let too_high_ts = parse_timestamp( 80 | &Timestamp { 81 | seconds: u64::MAX as i64, // Convert a really big number to i64 82 | nanos: 0, 83 | }, 84 | 1, 85 | ); 86 | let max_ts = parse_timestamp( 87 | &Timestamp { 88 | seconds: MAX_TIMESTAMP_SECS, 89 | nanos: 0, 90 | }, 91 | 1, 92 | ); 93 | assert_eq!(too_high_ts, max_ts); 94 | 95 | let ts2 = parse_timestamp_secs(600000000000000, 2); 96 | assert_eq!(ts2.year(), 9999); 97 | 98 | let ts3 = parse_timestamp_secs(1659386386, 2); 99 | assert_eq!(ts3.timestamp(), 1659386386); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /examples/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [alias] 2 | xclippy = [ 3 | "clippy", 4 | "--workspace", 5 | "--all-targets", 6 | "--", 7 | "-Dwarnings", 8 | "-Wclippy::all", 9 | "-Aclippy::upper_case_acronyms", 10 | "-Aclippy::enum-variant-names", 11 | "-Aclippy::result-large-err", 12 | "-Aclippy::mutable-key-type", 13 | "-Aclippy::map_identity", # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764 14 | ] 15 | 16 | [build] 17 | rustflags = [ 18 | "--cfg", 19 | "tokio_unstable", 20 | "-C", 21 | "force-frame-pointers=yes", 22 | "-C", 23 | "force-unwind-tables=yes", 24 | ] 25 | 26 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here. 27 | #[target.x86_64-unknown-linux-gnu] 28 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"] 29 | 30 | # 64 bit MSVC 31 | #[target.x86_64-pc-windows-msvc] 32 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB] 33 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | 4 | members = ["postgres-basic-events-example"] 5 | 6 | [workspace.package] 7 | authors = ["Aptos Labs "] 8 | edition = "2021" 9 | homepage = "https://aptoslabs.com" 10 | license = "Apache-2.0" 11 | publish = false 12 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk" 13 | rust-version = "1.78" 14 | 15 | [workspace.dependencies] 16 | postgres-basic-events-example = { path = "postgres-basic-events-example" } 17 | 18 | aptos-indexer-processor-sdk = { path = "../aptos-indexer-processors-sdk/sdk", features = [ 19 | "postgres_full", 20 | ] } 21 | anyhow = "1.0.86" 22 | async-trait = "0.1.80" 23 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] } 24 | diesel = { version = "=2.2.0", features = [ 25 | "chrono", 26 | "postgres_backend", 27 | "numeric", 28 | "serde_json", 29 | ] } 30 | diesel_migrations = { version = "2.1.0", features = ["postgres"] } 31 | field_count = "0.1.1" 32 | rayon = "1.10.0" 33 | sample = { path = "../aptos-indexer-processors-sdk/sample" } 34 | serde = { version = "1.0.193", features = ["derive", "rc"] } 35 | serde_json = { version = "1.0.81", features = ["preserve_order"] } 36 | tokio = { version = "1.37.0", features = ["full"] } 37 | tracing = "0.1.34" 38 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "postgres-basic-events-example" 3 | version = "1.0.0" 4 | 5 | # Workspace inherited keys 6 | authors = { workspace = true } 7 | edition = { workspace = true } 8 | homepage = { workspace = true } 9 | license = { workspace = true } 10 | publish = { workspace = true } 11 | repository = { workspace = true } 12 | rust-version = { workspace = true } 13 | 14 | [dependencies] 15 | anyhow = { workspace = true } 16 | aptos-indexer-processor-sdk = { workspace = true } 17 | async-trait = { workspace = true } 18 | clap = { workspace = true } 19 | diesel = { workspace = true } 20 | diesel_migrations = { workspace = true } 21 | field_count = { workspace = true } 22 | rayon = { workspace = true } 23 | serde = { workspace = true } 24 | serde_json = { workspace = true } 25 | tokio = { workspace = true } 26 | tracing = { workspace = true } 27 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/README.md: -------------------------------------------------------------------------------- 1 | # Example Postgres events processor 2 | 3 | ## About 4 | 5 | A basic processor that indexes events into Postgres. It uses the `process_function` utility function. 6 | 7 | ## How to use 8 | 1. Install Postgres and Diesel CLI 9 | 2. Construct a `config.yaml` file. You can see `postgres-basic-events-example/example-config.yaml` as an example. 10 | 3. cd ~/aptos-indexer-processors-sdk/example 11 | 4. cargo run -p postgres-basic-events-example -- -c postgres-basic-events-example/example-config.yaml 12 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/example-config.yaml: -------------------------------------------------------------------------------- 1 | # This is a template yaml for the processor 2 | health_check_port: 8085 3 | server_config: 4 | transaction_stream_config: 5 | indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443" 6 | auth_token: "AUTH_TOKEN" 7 | request_name_header: "events-processor" 8 | starting_version: 0 9 | postgres_config: 10 | connection_string: postgresql://postgres:@localhost:5432/example 11 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/diesel.toml: -------------------------------------------------------------------------------- 1 | # For documentation on how to configure this file, 2 | # see https://diesel.rs/guides/configuring-diesel-cli 3 | 4 | [print_schema] 5 | file = "schema.rs" 6 | 7 | [migrations_directory] 8 | dir = "migrations" 9 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/down.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass); 6 | DROP FUNCTION IF EXISTS diesel_set_updated_at(); 7 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/up.sql: -------------------------------------------------------------------------------- 1 | -- This file was automatically created by Diesel to setup helper functions 2 | -- and other internal bookkeeping. This file is safe to edit, any future 3 | -- changes will be added to existing projects as new migrations. 4 | 5 | 6 | 7 | 8 | -- Sets up a trigger for the given table to automatically set a column called 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included 10 | -- in the modified columns) 11 | -- 12 | -- # Example 13 | -- 14 | -- ```sql 15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW()); 16 | -- 17 | -- SELECT diesel_manage_updated_at('users'); 18 | -- ``` 19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$ 20 | BEGIN 21 | EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s 22 | FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl); 23 | END; 24 | $$ LANGUAGE plpgsql; 25 | 26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$ 27 | BEGIN 28 | IF ( 29 | NEW IS DISTINCT FROM OLD AND 30 | NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at 31 | ) THEN 32 | NEW.updated_at := current_timestamp; 33 | END IF; 34 | RETURN NEW; 35 | END; 36 | $$ LANGUAGE plpgsql; 37 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/down.sql: -------------------------------------------------------------------------------- 1 | -- This file should undo anything in `up.sql` 2 | DROP TABLE IF EXISTS events; 3 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/up.sql: -------------------------------------------------------------------------------- 1 | -- Your SQL goes here 2 | CREATE TABLE events ( 3 | sequence_number BIGINT NOT NULL, 4 | creation_number BIGINT NOT NULL, 5 | account_address VARCHAR(66) NOT NULL, 6 | transaction_version BIGINT NOT NULL, 7 | transaction_block_height BIGINT NOT NULL, 8 | type TEXT NOT NULL, 9 | data JSONB NOT NULL, 10 | inserted_at TIMESTAMP NOT NULL DEFAULT NOW(), 11 | event_index BIGINT NOT NULL, 12 | indexed_type VARCHAR(300) NOT NULL, 13 | PRIMARY KEY (transaction_version, event_index) 14 | ); -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/db/schema.rs: -------------------------------------------------------------------------------- 1 | // @generated automatically by Diesel CLI. 2 | 3 | diesel::table! { 4 | events (transaction_version, event_index) { 5 | sequence_number -> Int8, 6 | creation_number -> Int8, 7 | #[max_length = 66] 8 | account_address -> Varchar, 9 | transaction_version -> Int8, 10 | transaction_block_height -> Int8, 11 | #[sql_name = "type"] 12 | type_ -> Text, 13 | data -> Jsonb, 14 | inserted_at -> Timestamp, 15 | event_index -> Int8, 16 | #[max_length = 300] 17 | indexed_type -> Varchar, 18 | } 19 | } 20 | 21 | diesel::allow_tables_to_appear_in_same_query!(events,); 22 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/events_model.rs: -------------------------------------------------------------------------------- 1 | // Copyright © Aptos Foundation 2 | // SPDX-License-Identifier: Apache-2.0 3 | 4 | #![allow(clippy::extra_unused_lifetimes)] 5 | 6 | use crate::schema::events; 7 | use aptos_indexer_processor_sdk::{ 8 | aptos_protos::transaction::v1::Event as EventPB, 9 | utils::convert::{standardize_address, truncate_str}, 10 | }; 11 | use diesel::{Identifiable, Insertable}; 12 | use field_count::FieldCount; 13 | use serde::{Deserialize, Serialize}; 14 | 15 | // p99 currently is 303 so using 300 as a safe max length 16 | const EVENT_TYPE_MAX_LENGTH: usize = 300; 17 | 18 | #[derive(Clone, Debug, Deserialize, FieldCount, Identifiable, Insertable, Serialize)] 19 | #[diesel(primary_key(transaction_version, event_index))] 20 | #[diesel(table_name = events)] 21 | pub struct Event { 22 | pub sequence_number: i64, 23 | pub creation_number: i64, 24 | pub account_address: String, 25 | pub transaction_version: i64, 26 | pub transaction_block_height: i64, 27 | pub type_: String, 28 | pub data: serde_json::Value, 29 | pub event_index: i64, 30 | pub indexed_type: String, 31 | } 32 | 33 | impl Event { 34 | pub fn from_event( 35 | event: &EventPB, 36 | transaction_version: i64, 37 | transaction_block_height: i64, 38 | event_index: i64, 39 | ) -> Self { 40 | let t: &str = event.type_str.as_ref(); 41 | Event { 42 | account_address: standardize_address( 43 | event.key.as_ref().unwrap().account_address.as_str(), 44 | ), 45 | creation_number: event.key.as_ref().unwrap().creation_number as i64, 46 | sequence_number: event.sequence_number as i64, 47 | transaction_version, 48 | transaction_block_height, 49 | type_: t.to_string(), 50 | data: serde_json::from_str(event.data.as_str()).unwrap(), 51 | event_index, 52 | indexed_type: truncate_str(t, EVENT_TYPE_MAX_LENGTH), 53 | } 54 | } 55 | 56 | pub fn from_events( 57 | events: &[EventPB], 58 | transaction_version: i64, 59 | transaction_block_height: i64, 60 | ) -> Vec { 61 | events 62 | .iter() 63 | .enumerate() 64 | .map(|(index, event)| { 65 | Self::from_event( 66 | event, 67 | transaction_version, 68 | transaction_block_height, 69 | index as i64, 70 | ) 71 | }) 72 | .collect::>() 73 | } 74 | } 75 | 76 | // Prevent conflicts with other things named `Event` 77 | pub type EventModel = Event; 78 | -------------------------------------------------------------------------------- /examples/postgres-basic-events-example/src/main.rs: -------------------------------------------------------------------------------- 1 | use crate::events_model::EventModel; 2 | use anyhow::Result; 3 | use aptos_indexer_processor_sdk::{ 4 | aptos_protos::transaction::v1::transaction::TxnData, 5 | postgres::{ 6 | basic_processor::process, 7 | utils::database::{execute_in_chunks, MAX_DIESEL_PARAM_SIZE}, 8 | }, 9 | }; 10 | use diesel::{pg::Pg, query_builder::QueryFragment}; 11 | use diesel_migrations::{embed_migrations, EmbeddedMigrations}; 12 | use field_count::FieldCount; 13 | use rayon::prelude::*; 14 | use tracing::{error, info, warn}; 15 | 16 | pub mod events_model; 17 | #[path = "db/schema.rs"] 18 | pub mod schema; 19 | 20 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("src/db/migrations"); 21 | 22 | fn insert_events_query( 23 | items_to_insert: Vec, 24 | ) -> impl QueryFragment + diesel::query_builder::QueryId + Send { 25 | use crate::schema::events::dsl::*; 26 | diesel::insert_into(crate::schema::events::table) 27 | .values(items_to_insert) 28 | .on_conflict((transaction_version, event_index)) 29 | .do_nothing() 30 | } 31 | 32 | #[tokio::main] 33 | async fn main() -> Result<()> { 34 | process( 35 | "events_processor".to_string(), 36 | MIGRATIONS, 37 | async |transactions, conn_pool| { 38 | let events = transactions 39 | .par_iter() 40 | .map(|txn| { 41 | let txn_version = txn.version as i64; 42 | let block_height = txn.block_height as i64; 43 | let txn_data = match txn.txn_data.as_ref() { 44 | Some(data) => data, 45 | None => { 46 | warn!( 47 | transaction_version = txn_version, 48 | "Transaction data doesn't exist" 49 | ); 50 | return vec![]; 51 | }, 52 | }; 53 | let default = vec![]; 54 | let raw_events = match txn_data { 55 | TxnData::BlockMetadata(tx_inner) => &tx_inner.events, 56 | TxnData::Genesis(tx_inner) => &tx_inner.events, 57 | TxnData::User(tx_inner) => &tx_inner.events, 58 | _ => &default, 59 | }; 60 | 61 | EventModel::from_events(raw_events, txn_version, block_height) 62 | }) 63 | .flatten() 64 | .collect::>(); 65 | 66 | // Store events in the database 67 | let execute_res = execute_in_chunks( 68 | conn_pool.clone(), 69 | insert_events_query, 70 | &events, 71 | MAX_DIESEL_PARAM_SIZE / EventModel::field_count(), 72 | ) 73 | .await; 74 | match execute_res { 75 | Ok(_) => { 76 | info!( 77 | "Events version [{}, {}] stored successfully", 78 | transactions.first().unwrap().version, 79 | transactions.last().unwrap().version 80 | ); 81 | Ok(()) 82 | }, 83 | Err(e) => { 84 | error!("Failed to store events: {:?}", e); 85 | Err(e) 86 | }, 87 | } 88 | }, 89 | ) 90 | .await?; 91 | Ok(()) 92 | } 93 | -------------------------------------------------------------------------------- /examples/rustfmt.toml: -------------------------------------------------------------------------------- 1 | combine_control_expr = false 2 | edition = "2021" 3 | imports_granularity = "Crate" 4 | format_macro_matchers = true 5 | group_imports = "One" 6 | hex_literal_case = "Upper" 7 | match_block_trailing_comma = true 8 | newline_style = "Unix" 9 | overflow_delimited_expr = true 10 | reorder_impl_items = true 11 | use_field_init_shorthand = true 12 | -------------------------------------------------------------------------------- /scripts/check_banned_deps.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script checks if the crates in both examples and aptos-indexer-processors-sdk 4 | # depend on external deps that they shouldn't. We run this in CI to make sure we don't 5 | # accidentally reintroduce deps that would make the crates unusable for the CLI. 6 | # 7 | # While it would be more reliable to actually build the crate and check what libraries 8 | # it links to, e.g. with otool, it is much cheaper to use cargo tree. As far as I can 9 | # tell the entire Rust ecosystem makes use of these `x-sys` libraries to depend on 10 | # external dynamically linked libraries. 11 | # 12 | # Run this from the root directory of the project. 13 | 14 | # Make sure we're in the root directory 15 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then 16 | echo "Please run this script from the root directory of the project" 17 | exit 1 18 | fi 19 | 20 | # We only run the check on the SDK since that's the only crate used by the CLI. 21 | cd "aptos-indexer-processors-sdk" 22 | 23 | declare -a deps=("pq-sys" "openssl-sys") 24 | 25 | for dep in "${deps[@]}"; do 26 | echo "Checking for banned dependency $dep..." 27 | 28 | # Check for deps. As you can see, we only check for MacOS right now. 29 | # We specify --features postgres_partial because we only care about these banned deps 30 | # for the local testnet use case, in which case it uses only a subset of the 31 | # postgres features that don't include pq-sys. 32 | out=`cargo tree --features postgres_partial -e features,no-build,no-dev --target aarch64-apple-darwin -i "$dep"` 33 | 34 | # If the exit status was non-zero, great, the dep couldn't be found. 35 | if [ $? -ne 0 ]; then 36 | continue 37 | fi 38 | 39 | # If the exit status was zero we have to check the output to see if the dep is in 40 | # use. If it is in the output, it is in use. 41 | if [[ $out != *"$dep"* ]]; then 42 | continue 43 | fi 44 | 45 | echo "Banned dependency $dep found in $dir!" 46 | cd ../.. 47 | exit 1 48 | done 49 | 50 | echo "None of the banned dependencies are in use in $dir, great!" 51 | 52 | exit 0 -------------------------------------------------------------------------------- /scripts/rust_lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # This script runs linting for both the examples and aptos-indexer-processors-sdk directories 4 | # It assumes you have cargo-sort installed. If not, you can install it with: 5 | # cargo install cargo-sort 6 | 7 | set -e 8 | 9 | # Function to run linting in a directory 10 | run_lint() { 11 | local dir=$1 12 | echo "Running lint in $dir..." 13 | 14 | # Change to the directory 15 | cd "$dir" 16 | 17 | # Run in check mode if requested 18 | CHECK_ARG="" 19 | if [ "$1" = "--check" ]; then 20 | CHECK_ARG="--check" 21 | fi 22 | 23 | # Run the linting commands 24 | cargo +nightly xclippy 25 | cargo +nightly fmt $CHECK_ARG 26 | cargo sort --grouped --workspace $CHECK_ARG 27 | 28 | # Return to the original directory 29 | cd .. 30 | } 31 | 32 | # Make sure we're in the root directory 33 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then 34 | echo "Please run this script from the root directory of the project" 35 | exit 1 36 | fi 37 | 38 | # Run linting for both directories 39 | echo "Starting linting process..." 40 | 41 | echo "\nLinting examples directory..." 42 | run_lint "examples" 43 | 44 | echo "\nLinting aptos-indexer-processors-sdk directory..." 45 | run_lint "aptos-indexer-processors-sdk" 46 | 47 | echo "\nLinting completed successfully!" --------------------------------------------------------------------------------