├── .github
    ├── CODEOWNERS
    └── workflows
    │   ├── lint.yaml
    │   ├── release.yaml
    │   ├── tests.yaml
    │   ├── update-processor-sdk-version-legacy.yaml
    │   ├── update-processor-sdk-version.yaml
    │   └── update-proto-dependency.yaml
├── .gitignore
├── README.md
├── aptos-indexer-processors-sdk
    ├── .cargo
    │   └── config.toml
    ├── Cargo.lock
    ├── Cargo.toml
    ├── instrumented-channel
    │   ├── Cargo.lock
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── channel_metrics.rs
    │   │   └── lib.rs
    ├── moving-average
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    ├── rustfmt.toml
    ├── sample
    │   ├── Cargo.toml
    │   └── src
    │   │   └── lib.rs
    ├── sdk
    │   ├── Cargo.toml
    │   └── src
    │   │   ├── builder
    │   │       ├── dag.rs
    │   │       ├── mod.rs
    │   │       └── processor_builder.rs
    │   │   ├── common_steps
    │   │       ├── arcify_step.rs
    │   │       ├── mod.rs
    │   │       ├── order_by_version_step.rs
    │   │       ├── timed_buffer_step.rs
    │   │       ├── transaction_stream_step.rs
    │   │       ├── version_tracker_step.rs
    │   │       └── write_rate_limit_step.rs
    │   │   ├── lib.rs
    │   │   ├── postgres
    │   │       ├── README.md
    │   │       ├── basic_processor
    │   │       │   ├── README.md
    │   │       │   ├── basic_processor_function.rs
    │   │       │   ├── basic_processor_step.rs
    │   │       │   └── mod.rs
    │   │       ├── db
    │   │       │   ├── diesel.toml
    │   │       │   ├── migrations
    │   │       │   │   ├── 00000000000000_diesel_initial_setup
    │   │       │   │   │   ├── down.sql
    │   │       │   │   │   └── up.sql
    │   │       │   │   └── 2025-03-06-201942_create_core_schema
    │   │       │   │   │   ├── down.sql
    │   │       │   │   │   └── up.sql
    │   │       │   └── processor_metadata_schema.rs
    │   │       ├── mod.rs
    │   │       ├── models
    │   │       │   ├── ledger_info.rs
    │   │       │   ├── mod.rs
    │   │       │   └── processor_status.rs
    │   │       ├── subconfigs
    │   │       │   ├── mod.rs
    │   │       │   └── postgres_config.rs
    │   │       └── utils
    │   │       │   ├── checkpoint.rs
    │   │       │   ├── database.rs
    │   │       │   └── mod.rs
    │   │   ├── server_framework.rs
    │   │   ├── test
    │   │       ├── mod.rs
    │   │       ├── steps
    │   │       │   ├── mod.rs
    │   │       │   └── pass_through_step.rs
    │   │       └── utils.rs
    │   │   ├── testing_framework
    │   │       ├── cli_parser.rs
    │   │       ├── database.rs
    │   │       ├── mock_grpc.rs
    │   │       ├── mod.rs
    │   │       └── sdk_test_context.rs
    │   │   ├── traits
    │   │       ├── README.md
    │   │       ├── async_step.rs
    │   │       ├── instrumentation.rs
    │   │       ├── into_runnable_step.rs
    │   │       ├── mod.rs
    │   │       ├── pollable_async_step.rs
    │   │       ├── processable.rs
    │   │       ├── processor_trait.rs
    │   │       └── runnable_step.rs
    │   │   ├── types
    │   │       ├── mod.rs
    │   │       └── transaction_context.rs
    │   │   └── utils
    │   │       ├── README.md
    │   │       ├── chain_id_check.rs
    │   │       ├── constants.rs
    │   │       ├── convert.rs
    │   │       ├── errors.rs
    │   │       ├── extract.rs
    │   │       ├── mod.rs
    │   │       ├── property_map.rs
    │   │       └── step_metrics.rs
    └── transaction-stream
    │   ├── Cargo.toml
    │   └── src
    │       ├── config.rs
    │       ├── lib.rs
    │       ├── transaction_stream.rs
    │       └── utils
    │           ├── additional_headers.rs
    │           ├── mod.rs
    │           └── time.rs
├── examples
    ├── .cargo
    │   └── config.toml
    ├── Cargo.lock
    ├── Cargo.toml
    ├── postgres-basic-events-example
    │   ├── Cargo.toml
    │   ├── README.md
    │   ├── example-config.yaml
    │   └── src
    │   │   ├── db
    │   │       ├── diesel.toml
    │   │       ├── migrations
    │   │       │   ├── 00000000000000_diesel_initial_setup
    │   │       │   │   ├── down.sql
    │   │       │   │   └── up.sql
    │   │       │   └── 2025-03-06-231718_create_events
    │   │       │   │   ├── down.sql
    │   │       │   │   └── up.sql
    │   │       └── schema.rs
    │   │   ├── events_model.rs
    │   │   └── main.rs
    └── rustfmt.toml
└── scripts
    ├── check_banned_deps.sh
    └── rust_lint.sh


/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @aptos-labs/ecosystem-infra @rtso


--------------------------------------------------------------------------------
/.github/workflows/lint.yaml:
--------------------------------------------------------------------------------
 1 | name: "Lint"
 2 | on:
 3 |   # Allow us to run this specific workflow without a PR
 4 |   workflow_dispatch:
 5 |   pull_request:
 6 |   push:
 7 |     branches:
 8 |       - main
 9 | 
10 | # cancel redundant builds
11 | concurrency:
12 |   # for push and workflow_dispatch events we use `github.sha` in the concurrency group and don't really cancel each other out/limit concurrency
13 |   # for pull_request events newer jobs cancel earlier jobs to save on CI etc.
14 |   group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   Lint:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - name: Install Dependencies (aptos-indexer-processors-sdkl)
24 |         run: |
25 |           sudo apt update && sudo apt install libdw-dev
26 |           cargo install cargo-sort
27 |           rustup update
28 |           rustup toolchain install nightly
29 |           rustup component add clippy --toolchain nightly
30 |           rustup component add rustfmt --toolchain nightly
31 |         working-directory: aptos-indexer-processors-sdk
32 |     
33 |       - name: Install Dependencies (examples)
34 |         run: |
35 |           sudo apt update && sudo apt install libdw-dev
36 |           cargo install cargo-sort
37 |           rustup update
38 |           rustup toolchain install nightly
39 |           rustup component add clippy --toolchain nightly
40 |           rustup component add rustfmt --toolchain nightly
41 |         working-directory: examples
42 | 
43 |       - name: Run Linter
44 |         run: |
45 |           bash scripts/rust_lint.sh --check
46 | 
47 |       - name: Check Banned Dependencies
48 |         run: bash scripts/check_banned_deps.sh
49 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: "Create Release Tag"
 2 | on:
 3 |   workflow_dispatch:
 4 |     inputs:
 5 |       release_type:
 6 |         description: 'Type of release (patch/minor)'
 7 |         required: true
 8 |         type: choice
 9 |         options:
10 |           - 'release patch'
11 |           - 'release minor'
12 | 
13 | jobs:
14 |   create-tag:
15 |     runs-on: ubuntu-latest
16 |     permissions:
17 |       contents: write
18 |     steps:
19 |       - uses: actions/checkout@v4
20 |         with:
21 |           fetch-depth: 0  # Fetch all history for all tags and branches
22 | 
23 |       - name: Determine Next Version
24 |         id: next-version
25 |         run: |
26 |           # Get the latest tag that matches our pattern
27 |           latest_tag=$(git tag -l "aptos-indexer-sdk-v*" | sort -V | tail -n 1)
28 |           
29 |           if [ -z "$latest_tag" ]; then
30 |             # If no existing tag, start with 1.0.0
31 |             echo "next_tag=aptos-indexer-sdk-v1.0.0" >> $GITHUB_OUTPUT
32 |             exit 0
33 |           fi
34 |           
35 |           # Extract version numbers
36 |           version=$(echo $latest_tag | sed 's/aptos-indexer-sdk-v//')
37 |           major=$(echo $version | cut -d. -f1)
38 |           minor=$(echo $version | cut -d. -f2)
39 |           patch=$(echo $version | cut -d. -f3)
40 |           
41 |           if [ "${{ github.event.inputs.release_type }}" = "release patch" ]; then
42 |             # Increment patch version
43 |             new_version="${major}.${minor}.$((patch + 1))"
44 |           else
45 |             # Increment minor version, reset patch to 0
46 |             new_version="${major}.$((minor + 1)).0"
47 |           fi
48 |           
49 |           echo "next_tag=aptos-indexer-sdk-v${new_version}" >> $GITHUB_OUTPUT
50 |           echo "Current version: ${latest_tag}"
51 |           echo "Next version will be: aptos-indexer-sdk-v${new_version}"
52 | 
53 |       - name: Create and Push Tag
54 |         run: |
55 |           git tag ${{ steps.next-version.outputs.next_tag }}
56 |           git push origin ${{ steps.next-version.outputs.next_tag }}
57 | 
58 |       - name: Create Release
59 |         uses: softprops/action-gh-release@v1
60 |         with:
61 |           tag_name: ${{ steps.next-version.outputs.next_tag }}
62 |           name: ${{ steps.next-version.outputs.next_tag }}
63 |           generate_release_notes: true


--------------------------------------------------------------------------------
/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: "Tests"
 2 | on:
 3 |   workflow_dispatch:
 4 |   pull_request:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.event_name }}-${{ (github.event_name == 'push' || github.event_name == 'workflow_dispatch') && github.sha || github.head_ref || github.ref }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   Test:
15 |     runs-on: ubuntu-latest
16 |     steps:
17 |       - uses: actions/checkout@v4
18 | 
19 |       - name: Install Dependencies
20 |         run: |
21 |           sudo apt update && sudo apt install libdw-dev
22 |           cargo install cargo-sort
23 |           rustup update
24 |           rustup toolchain install nightly
25 |         working-directory: aptos-indexer-processors-sdk
26 | 
27 |       - name: Build with No Default Features
28 |         run: cargo build --no-default-features
29 |         working-directory: aptos-indexer-processors-sdk
30 | 
31 |       - name: Run Tests
32 |         id: tests
33 |         continue-on-error: true  # Allow workflow to continue if tests fail
34 |         run: cargo test
35 |         working-directory: aptos-indexer-processors-sdk
36 | 
37 |       - name: Notify Eco Infra Oncall about proto update failure
38 |         if: |
39 |           steps.tests.outcome == 'failure' && 
40 |           github.event_name == 'pull_request' && 
41 |           contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update')
42 |         uses: slackapi/slack-github-action@v1.24.0
43 |         with:
44 |           # eco-infra-oncall channel.
45 |           channel-id: 'C0468USBLQJ'
46 |           slack-message: |
47 |             :warning: Tests failed on PR with indexer-sdk-update label
48 |             PR: ${{ github.event.pull_request.html_url }}
49 |             Author: ${{ github.event.pull_request.user.login }}
50 |             Title: ${{ github.event.pull_request.title }}
51 |         env:
52 |           SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
53 | 


--------------------------------------------------------------------------------
/.github/workflows/update-processor-sdk-version-legacy.yaml:
--------------------------------------------------------------------------------
 1 | name: Update Processor SDK Version
 2 | 'on':
 3 |   pull_request:
 4 |     types: [labeled, opened, synchronize, reopened, auto_merge_enabled]
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       # Be conservative and only run this workflow when the Cargo.toml file changes.
 9 |       # Reason: if SDK version is not updated, no change will be picked up by processors.
10 |       - aptos-indexer-processors-sdk/Cargo.toml
11 | 
12 | permissions:
13 |   contents: read
14 |   id-token: write
15 | 
16 | jobs:
17 |   update-processor-sdk-version:
18 |     runs-on: ubuntu-latest
19 |     # Only run on each PR once an appropriate event occurs
20 |     if: |
21 |       (
22 |         github.event_name == 'push' ||
23 |         contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update')
24 |       )
25 |     steps:
26 |       - id: auth
27 |         uses: "google-github-actions/auth@v2"
28 |         with:
29 |           workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
30 |           service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
31 |       - name: Get Secret Manager Secrets
32 |         id: secrets
33 |         uses: 'google-github-actions/get-secretmanager-secrets@v2'
34 |         with:
35 |           secrets: |-
36 |             token:aptos-ci/github-actions-repository-dispatch
37 |       - name: Checkout code
38 |         uses: actions/checkout@v4
39 |         with:
40 |           token: ${{ steps.secrets.outputs.token }}
41 |       - name: Capture the commit hash
42 |         id: commit_hash
43 |         run: |
44 |           # Echo the commit hash to the output
45 |           echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)"
46 |           # Echo the PR branch name to the output
47 |           echo "::set-output name=branch_name::${{ github.head_ref }}"
48 |       - name: Setup Rust
49 |         uses: actions-rust-lang/setup-rust-toolchain@v1
50 |       - name: Install toml
51 |         run: cargo install toml-cli
52 |       - name: Capture aptos-protos commit hash
53 |         id: aptos_protos_commit_hash
54 |         run: |
55 |           cd aptos-indexer-processors-sdk
56 |           aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev)
57 |           echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}"
58 |       - name: Dispatch Event to processors Repo
59 |         uses: peter-evans/repository-dispatch@v3.0.0
60 |         with:
61 |           token: ${{ steps.secrets.outputs.token }}
62 |           repository: 'aptos-labs/aptos-indexer-processors'
63 |           event-type: 'sdk-dependency-update'
64 |           client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}'
65 | 


--------------------------------------------------------------------------------
/.github/workflows/update-processor-sdk-version.yaml:
--------------------------------------------------------------------------------
 1 | name: Update Processor SDK Version
 2 | 'on':
 3 |   pull_request:
 4 |     types: [labeled, opened, synchronize, reopened, auto_merge_enabled]
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       # Be conservative and only run this workflow when the Cargo.toml file changes.
 9 |       # Reason: if SDK version is not updated, no change will be picked up by processors.
10 |       - aptos-indexer-processors-sdk/Cargo.toml
11 | 
12 | permissions:
13 |   contents: read
14 |   id-token: write
15 | 
16 | jobs:
17 |   update-processor-sdk-version:
18 |     runs-on: ubuntu-latest
19 |     # Only run on each PR once an appropriate event occurs
20 |     if: |
21 |       (
22 |         github.event_name == 'push' ||
23 |         contains(github.event.pull_request.labels.*.name, 'indexer-sdk-update')
24 |       )
25 |     steps:
26 |       - id: auth
27 |         uses: "google-github-actions/auth@v2"
28 |         with:
29 |           workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
30 |           service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
31 |       - name: Get Secret Manager Secrets
32 |         id: secrets
33 |         uses: 'google-github-actions/get-secretmanager-secrets@v2'
34 |         with:
35 |           secrets: |-
36 |             token:aptos-ci/github-actions-repository-dispatch
37 |       - name: Checkout code
38 |         uses: actions/checkout@v4
39 |         with:
40 |           token: ${{ steps.secrets.outputs.token }}
41 |       - name: Capture the commit hash
42 |         id: commit_hash
43 |         run: |
44 |           # Echo the commit hash to the output
45 |           echo "::set-output name=commit_hash::$(echo $GITHUB_SHA)"
46 |           # Echo the PR branch name to the output
47 |           echo "::set-output name=branch_name::${{ github.head_ref }}"
48 |       - name: Setup Rust
49 |         uses: actions-rust-lang/setup-rust-toolchain@v1
50 |       - name: Install toml
51 |         run: cargo install toml-cli
52 |       - name: Capture aptos-protos commit hash
53 |         id: aptos_protos_commit_hash
54 |         run: |
55 |           cd aptos-indexer-processors-sdk
56 |           aptos_protos_commit_hash=$(toml get Cargo.toml workspace.dependencies.aptos-protos.rev)
57 |           echo "::set-output name=aptos_protos_commit_hash::${aptos_protos_commit_hash}"
58 |       - name: Dispatch Event to processors Repo
59 |         uses: peter-evans/repository-dispatch@v3.0.0
60 |         with:
61 |           token: ${{ steps.secrets.outputs.token }}
62 |           repository: 'aptos-labs/aptos-indexer-processors-v2'
63 |           event-type: 'sdk-dependency-update'
64 |           client-payload: '{"commit_hash": "${{ github.sha }}", "branch_name": "${{ steps.commit_hash.outputs.branch_name }}", "aptos_protos_commit_hash": ${{ steps.aptos_protos_commit_hash.outputs.aptos_protos_commit_hash }}}'
65 | 


--------------------------------------------------------------------------------
/.github/workflows/update-proto-dependency.yaml:
--------------------------------------------------------------------------------
 1 | name: Update Proto Dependency
 2 | 
 3 | on:
 4 |   repository_dispatch:
 5 |     types: [proto-dependency-update]
 6 |   workflow_dispatch:
 7 |     inputs:
 8 |       commit_hash:
 9 |         description: 'Commit hash to update proto to'
10 |         required: true
11 |       branch_name:
12 |         description: 'Branch name (without -update-aptos-protos suffix)'
13 |         required: true
14 |         default: 'main'
15 | 
16 | permissions:
17 |   contents: write
18 |   pull-requests: write
19 |   id-token: write
20 | 
21 | jobs:
22 |   update-the-dependency:
23 |     runs-on: ubuntu-latest
24 |     steps:
25 |       - id: auth
26 |         uses: "google-github-actions/auth@v2"
27 |         with:
28 |           workload_identity_provider: ${{ secrets.GCP_WORKLOAD_IDENTITY_PROVIDER }}
29 |           service_account: ${{ secrets.GCP_SERVICE_ACCOUNT_EMAIL }}
30 |       - name: Get Secret Manager Secrets
31 |         id: secrets
32 |         uses: 'google-github-actions/get-secretmanager-secrets@v2'
33 |         with:
34 |           secrets: |-
35 |             token:aptos-ci/github-actions-repository-dispatch
36 |       - name: Configure Git user
37 |         run: |
38 |           git config --global user.name "Aptos Bot"
39 |           git config --global user.email "aptos-bot@aptoslabs.com"
40 |       - name: Checkout
41 |         uses: actions/checkout@v4
42 |         with:
43 |           token: ${{ steps.secrets.outputs.token }}
44 |       - name: Setup Rust
45 |         uses: actions-rust-lang/setup-rust-toolchain@v1
46 | 
47 |       - name: Install toml
48 |         run: cargo install toml-cli
49 |       
50 |       - name: Update the dependency
51 |         run: |
52 |           set -e
53 |           toml set Cargo.toml workspace.dependencies.aptos-protos.rev ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }} > Cargo.tmp && mv Cargo.tmp Cargo.toml
54 |         working-directory: aptos-indexer-processors-sdk/
55 |     
56 |       - name: Commit and Push Changes
57 |         run: |
58 |            set -e
59 |            branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos"
60 |            git checkout -b "$branch_name"
61 |            git add Cargo.toml
62 |            git commit -m "Update aptos-protos to ${{ github.event.inputs.commit_hash || github.event.client_payload.commit_hash }}"
63 |            git push origin "$branch_name" --force
64 |         env:
65 |           GITHUB_TOKEN: ${{ steps.secrets.outputs.token }}
66 |         working-directory: aptos-indexer-processors-sdk/
67 |         
68 |       - name: Check if PR Already Exists
69 |         id: check_pr
70 |         run: |
71 |           branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos"
72 |           existing_pr=$(gh pr list --base main --head "$branch_name" --json number --jq '.[].number')
73 |           if [ -n "$existing_pr" ]; then
74 |             echo "::set-output name=if_pr_exists::true"
75 |           else
76 |             echo "::set-output name=if_pr_exists::false"
77 |           fi
78 |         env:
79 |           GITHUB_TOKEN: ${{ steps.secrets.outputs.token }}
80 |       - name: Create Pull Request
81 |         if: steps.check_pr.outputs.if_pr_exists == 'false'
82 |         run: |
83 |           branch_name="${{ github.event.inputs.branch_name || github.event.client_payload.branch_name }}-update-aptos-protos"
84 |           gh pr create --title "Update aptos-protos to upstream branch ${{ github.event.client_payload.branch_name }}" \
85 |                        --body "This PR updates aptos-protos to new version." \
86 |                        --base main \
87 |                        --head "$branch_name" \
88 |                        --label "indexer-sdk-update"
89 |         env:
90 |           GITHUB_TOKEN: ${{ steps.secrets.outputs.token }}
91 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build
 2 | 
 3 | # Rust specific ignores
 4 | # Please follow https://help.github.com/en/articles/ignoring-files to create a global
 5 | # .gitignore file locally for IDE/Emacs/Vim generated files.
 6 | **/target
 7 | **/*.rs.bk
 8 | .idea/
 9 | 
10 | # macOS Specific ignores
11 | # General
12 | .DS_Store
13 | .AppleDouble
14 | .LSOverride
15 | 
16 | # VSCode settings
17 | .vscode/
18 | 
19 | # Processor config
20 | config.yaml


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Aptos Indexer SDK
  2 | Generally, an indexer processor follow this flow:
  3 | 
  4 | 1. Receive a stream of Aptos transactions
  5 | 2. Extract data from the transactions
  6 | 3. Transform and merge the parsed data into a coherent, standardized schema
  7 | 4. Store the transformed data into a database
  8 | 
  9 | The Aptos Indexer SDK works by modeling each processor as a graph of independent steps. Each of the steps in the flow above is written as a `Step` in the SDK, and the output of each `Step` is connected to the input of the next `Step` by a channel.
 10 | 
 11 | # How to use
 12 | 
 13 | To your `Cargo.toml` , add
 14 | 
 15 | ```yaml
 16 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" }
 17 | aptos-indexer-processor-sdk-server-framework = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}" }
 18 | ```
 19 | 
 20 | # Get started
 21 | 
 22 | We’ve created a [Quickstart Guide to Aptos Indexer SDK](https://github.com/aptos-labs/aptos-indexer-processor-example) which gets you setup and running an events processor that indexes events on the Aptos blockchain. 
 23 | 
 24 | # Documentation
 25 | 
 26 | ## Creating a step
 27 | 
 28 | To create a step in the SDK, implement these traits:
 29 | 
 30 | 1. **Processable**
 31 |     
 32 |     ```rust
 33 |     #[async_trait]
 34 |     impl Processable for MyExtractorStep {
 35 |         type Input = Transaction;
 36 |         type Output = ExtractedDataModel;
 37 |         type RunType = AsyncRunType;
 38 |     
 39 |     		// Processes a batch of input items and returns a batch of output items.
 40 |         async fn process(
 41 |             &mut self,
 42 |             input: TransactionContext<Transaction>,
 43 |         ) -> Result<Option<TransactionContext<ExtractedDataModel>>, ProcessorError> {
 44 |             let extracted_data = ...
 45 |             // Extract data from items.data
 46 |             
 47 |             Ok(Some(TransactionContext {
 48 |                 data: extracted_data,
 49 |                 start_version: input.start_version,
 50 |                 end_version: input.end_version,
 51 |                 start_transaction_timestamp: input.start_transaction_timestamp,
 52 |                 end_transaction_timestamp: input.end_transaction_timestamp,
 53 |                 total_size_in_bytes: input.total_size_in_bytes,
 54 |             }))
 55 |         }
 56 |     }
 57 |     ```
 58 |     
 59 | 2. **NamedStep**
 60 |     
 61 |     ```rust
 62 |     impl NamedStep for MyExtractorStep {
 63 |         fn name(&self) -> String {
 64 |             "MyExtractorStep".to_string()
 65 |         }
 66 |     }
 67 |     ```
 68 |     
 69 | 3. Either **AsyncStep** or **PollableAsyncStep**, which defines how the step will be run in the processor.
 70 |     1. The most basic step is an `AsyncStep`, which processes a batch of input items and returns a batch of output items.  
 71 |         
 72 |         ```rust
 73 |         #[async_trait]
 74 |         impl Processable for MyExtractorStep {
 75 |             ...
 76 |             type RunType = AsyncRunType;
 77 |         		...
 78 |         }
 79 |         
 80 |         impl AsyncStep for MyExtractorStep {}
 81 |         ```
 82 |         
 83 |     2. A `PollableAsyncStep` does the same as `AsyncStep`, but it also periodically polls its internal state and returns a batch of output items if available.
 84 |         
 85 |         ```rust
 86 |         #[async_trait]
 87 |         impl<T> Processable for MyPollStep<T>
 88 |         where
 89 |             Self: Sized + Send + 'static,
 90 |             T: Send + 'static,
 91 |         {
 92 |             ...
 93 |             type RunType = PollableAsyncRunType;
 94 |         		...
 95 |         }
 96 |         
 97 |         #[async_trait]
 98 |         impl<T: Send + 'static> PollableAsyncStep for MyPollStep<T>
 99 |         where
100 |             Self: Sized + Send + Sync + 'static,
101 |             T: Send + 'static,
102 |         {
103 |         		/// Returns the duration between polls
104 |             fn poll_interval(&self) -> std::time::Duration {
105 |                 // Define duration
106 |             }
107 |         
108 |         		/// Polls the internal state and returns a batch of output items if available.
109 |             async fn poll(&mut self) -> Result<Option<Vec<TransactionContext<T>>>, ProcessorError> {
110 |                 // Define polling logic
111 |             }
112 |         }
113 |         ```
114 |         
115 | 
116 | ## Common steps
117 | 
118 | The SDK provides several common steps to use in your processor. 
119 | 
120 | 1. `TransactionStreamStep` provides a stream of Aptos transactions to the processor
121 | 2. `TimedBufferStep` buffers a batch of items and periodically polls to release the items to the next step
122 | 
123 | ## Connecting steps
124 | 
125 | When `ProcessorBuilder` connects two steps, a channel is created linking the two steps and the output of the first step becomes the input of the next step.
126 | 
127 | ```rust
128 | let (pb, buffer_receiver) = ProcessorBuilder::new_with_inputless_first_step(
129 |       first_step.into_runnable_step(),
130 |   )
131 |   .connect_to(second_step.into_runnable_step(), channel_size)
132 |   .connect_to(third_step.into_runnable_step(), channel_size)
133 |   .end_and_return_output_receiver(channel_size);
134 | ```
135 | 
136 | ## Adding a new processor
137 | 
138 | 1. Use [aptos-indexer-processor-example](https://github.com/aptos-labs/aptos-indexer-processor-example) as a starting point
139 | 2. Add the new processor to [ProcessorConfig](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/processor_config.rs#L34) and [Processor](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/processor_config.rs#L58)
140 | 3. Add the processor to [RunnableConfig](https://github.com/aptos-labs/aptos-indexer-processor-example/blob/a8bbb23056d55b86b4ded6822c9120e5e8763d50/aptos-indexer-processor-example/src/config/indexer_processor_config.rs#L25)
141 | 
142 | ## Running a processor
143 | 
144 | To run the processor, we recommend using the example in [aptos-indexer-processor-example](https://github.com/aptos-labs/aptos-indexer-processor-example) and following this [configuration guide](https://github.com/aptos-labs/aptos-indexer-processor-example?tab=readme-ov-file#configuring-your-processor).
145 | 
146 | ## Advanced features (experimental)
147 | 
148 | 1. Fanout + ArcifyStep
149 | 2. Fan in
150 | 
151 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [alias]
 2 | xclippy = [
 3 |   "clippy",
 4 |   "--workspace",
 5 |   "--all-targets",
 6 |   "--all-features",
 7 |   "--",
 8 |   "-Dwarnings",
 9 |   "-Wclippy::all",
10 |   "-Aclippy::upper_case_acronyms",
11 |   "-Aclippy::enum-variant-names",
12 |   "-Aclippy::result-large-err",
13 |   "-Aclippy::mutable-key-type",
14 |   "-Aclippy::map_identity",        # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764
15 | ]
16 | 
17 | [build]
18 | rustflags = [
19 |   "--cfg",
20 |   "tokio_unstable",
21 |   "-C",
22 |   "force-frame-pointers=yes",
23 |   "-C",
24 |   "force-unwind-tables=yes",
25 | ]
26 | 
27 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here.
28 | #[target.x86_64-unknown-linux-gnu]
29 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"]
30 | 
31 | # 64 bit MSVC
32 | #[target.x86_64-pc-windows-msvc]
33 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB]
34 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/Cargo.toml:
--------------------------------------------------------------------------------
  1 | [workspace]
  2 | resolver = "2"
  3 | 
  4 | members = [
  5 |     "instrumented-channel",
  6 |     "moving-average",
  7 |     "sample",
  8 |     "sdk",
  9 |     "transaction-stream",
 10 | ]
 11 | 
 12 | [workspace.package]
 13 | authors = ["Aptos Labs <opensource@aptoslabs.com>"]
 14 | edition = "2021"
 15 | homepage = "https://aptoslabs.com"
 16 | license = "Apache-2.0"
 17 | publish = false
 18 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk"
 19 | rust-version = "1.78"
 20 | 
 21 | [workspace.dependencies]
 22 | aptos-indexer-processor-sdk = { path = "sdk" }
 23 | aptos-indexer-transaction-stream = { path = "transaction-stream" }
 24 | instrumented-channel = { path = "instrumented-channel" }
 25 | aptos-moving-average = { path = "moving-average" }
 26 | sample = { path = "sample" }
 27 | 
 28 | ahash = { version = "0.8.7", features = ["serde"] }
 29 | anyhow = "1.0.98"
 30 | aptos-protos = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" }
 31 | aptos-system-utils = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" }
 32 | aptos-transaction-filter = { git = "https://github.com/aptos-labs/aptos-core.git", rev = "2dd9c73b27fdcbe78c7391fd43c9a5d00b93e686" }
 33 | async-trait = "0.1.80"
 34 | autometrics = { version = "1.0.1", features = ["prometheus-exporter"] }
 35 | axum = "0.7.5"
 36 | backtrace = "0.3.58"
 37 | bcs = { git = "https://github.com/aptos-labs/bcs.git", rev = "d31fab9d81748e2594be5cd5cdf845786a30562d" }
 38 | bigdecimal = { version = "0.4.0", features = ["serde"] }
 39 | chrono = { version = "0.4.19", features = ["clock", "serde"] }
 40 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] }
 41 | # Do NOT enable the postgres feature here, it is conditionally enabled in a feature
 42 | # block in the Cargo.toml file for the processor crate.
 43 | # https://github.com/aptos-labs/aptos-indexer-processors/pull/325
 44 | diesel = { version = "=2.2.0", features = [
 45 |     "chrono",
 46 |     "postgres_backend",
 47 |     "numeric",
 48 |     "serde_json",
 49 | ] }
 50 | # Use the crate version once this feature gets released on crates.io:
 51 | # https://github.com/weiznich/diesel_async/commit/e165e8c96a6c540ebde2d6d7c52df5c5620a4bf1
 52 | diesel-async = { git = "https://github.com/weiznich/diesel_async.git", rev = "e3beac66cd41ab53d78a10328bb72f272103e5d1", features = [
 53 |     "async-connection-wrapper",
 54 |     "postgres",
 55 |     "bb8",
 56 |     "tokio",
 57 | ] }
 58 | diesel_migrations = { version = "2.1.0", features = ["postgres"] }
 59 | delegate = "0.12.0"
 60 | derive_builder = "0.20.0"
 61 | field_count = "0.1.1"
 62 | futures = "0.3.30"
 63 | futures-util = "0.3.21"
 64 | hex = "0.4.3"
 65 | indexmap = { version = "2.7.0", features = ["serde"] }
 66 | itertools = "0.13.0"
 67 | 
 68 | # Locking this because newer versions of kanal are using the unstable feature error_in_core, which
 69 | # will break the Aptos CLI. 
 70 | kanal = "=0.1.0-pre8"
 71 | lazy_static = "1.4.0"
 72 | mockall = "0.12.1"
 73 | num_cpus = "1.16.0"
 74 | once_cell = { version = "1.19.0" }
 75 | petgraph = "0.6.5"
 76 | prometheus = "0.13.3"
 77 | prometheus-client = "0.22.2"
 78 | prost = { version = "0.13.4", features = ["no-recursion-limit"] }
 79 | rayon = "1.10.0"
 80 | serde = { version = "1.0.193", features = ["derive", "rc"] }
 81 | serde_json = { version = "1.0.81", features = ["preserve_order"] }
 82 | serde_yaml = "0.8.24"
 83 | sha2 = "0.9.3"
 84 | strum = { version = "0.24.1", features = ["derive"] }
 85 | tempfile = "3.3.0"
 86 | testcontainers = "0.20.1"
 87 | thiserror = "1.0.61"
 88 | tiny-keccak = { version = "2.0.2", features = ["keccak", "sha3"] }
 89 | tracing = "0.1.34"
 90 | tokio = { version = "1.37.0", features = ["full"] }
 91 | tokio-retry = { version = "0.3.0" }
 92 | toml = "0.7.4"
 93 | tonic = { version = "0.12.3", features = [
 94 |     "tls",
 95 |     "tls-roots",
 96 |     "transport",
 97 |     "prost",
 98 |     "codegen",
 99 |     "zstd",
100 | ] }
101 | tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
102 | url = { version = "2.5.1", features = ["serde"] }
103 | 
104 | # Postgres SSL support
105 | native-tls = "0.2.11"
106 | postgres-native-tls = "0.5.0"
107 | tokio-postgres = "0.7.10"
108 | tokio-stream = { version = "0.1.16", features = ["net"] }
109 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/instrumented-channel/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 3
4 | 
5 | [[package]]
6 | name = "instrumented-channel"
7 | version = "0.1.0"
8 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/instrumented-channel/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "instrumented-channel"
 3 | version = "0.1.0"
 4 | 
 5 | # Workspace inherited keys
 6 | authors = { workspace = true }
 7 | edition = { workspace = true }
 8 | homepage = { workspace = true }
 9 | license = { workspace = true }
10 | publish = { workspace = true }
11 | repository = { workspace = true }
12 | rust-version = { workspace = true }
13 | 
14 | [dependencies]
15 | delegate = { workspace = true }
16 | derive_builder = { workspace = true }
17 | kanal = { workspace = true }
18 | once_cell = { workspace = true }
19 | prometheus = { workspace = true }
20 | prometheus-client = { workspace = true }
21 | 
22 | [dev-dependencies]
23 | tokio = { workspace = true }
24 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/instrumented-channel/src/channel_metrics.rs:
--------------------------------------------------------------------------------
  1 | use derive_builder::Builder;
  2 | use once_cell::sync::Lazy;
  3 | use prometheus_client::{
  4 |     encoding::EncodeLabelSet,
  5 |     metrics::{counter::Counter, family::Family, gauge::Gauge},
  6 |     registry::Registry,
  7 | };
  8 | use std::sync::atomic::AtomicU64;
  9 | 
 10 | pub const METRICS_PREFIX: &str = "aptos_procsdk_channel_";
 11 | 
 12 | pub fn init_channel_metrics_registry(registry: &mut Registry) {
 13 |     registry.register(
 14 |         format!("{}_{}", METRICS_PREFIX, "sent_messages_count"),
 15 |         "Number of messages sent",
 16 |         SENT_MESSAGES_COUNT.clone(),
 17 |     );
 18 | 
 19 |     registry.register(
 20 |         format!("{}_{}", METRICS_PREFIX, "received_messages_count"),
 21 |         "Number of messages received",
 22 |         RECEIVED_MESSAGES_COUNT.clone(),
 23 |     );
 24 | 
 25 |     registry.register(
 26 |         format!("{}_{}", METRICS_PREFIX, "send_duration"),
 27 |         "Duration in seconds to send a message",
 28 |         SEND_DURATION.clone(),
 29 |     );
 30 | 
 31 |     registry.register(
 32 |         format!("{}_{}", METRICS_PREFIX, "receive_duration"),
 33 |         "Duration in seconds to receive a message",
 34 |         RECEIVE_DURATION.clone(),
 35 |     );
 36 | 
 37 |     registry.register(
 38 |         format!("{}_{}", METRICS_PREFIX, "failed_sends_count"),
 39 |         "Number of failed sends",
 40 |         FAILED_SENDS_COUNT.clone(),
 41 |     );
 42 | 
 43 |     registry.register(
 44 |         format!("{}_{}", METRICS_PREFIX, "failed_receives_count"),
 45 |         "Number of failed receives",
 46 |         FAILED_RECEIVES_COUNT.clone(),
 47 |     );
 48 | 
 49 |     registry.register(
 50 |         format!("{}_{}", METRICS_PREFIX, "channel_size"),
 51 |         "Size of the channel",
 52 |         CHANNEL_SIZE.clone(),
 53 |     );
 54 | }
 55 | 
 56 | #[derive(Clone, Debug, Hash, PartialEq, Eq, EncodeLabelSet)]
 57 | pub struct ChannelMetricLabels {
 58 |     pub output_of: String,
 59 | }
 60 | 
 61 | pub static SENT_MESSAGES_COUNT: Lazy<Family<ChannelMetricLabels, Counter>> =
 62 |     Lazy::new(Family::<ChannelMetricLabels, Counter>::default);
 63 | 
 64 | pub static RECEIVED_MESSAGES_COUNT: Lazy<Family<ChannelMetricLabels, Counter>> =
 65 |     Lazy::new(Family::<ChannelMetricLabels, Counter>::default);
 66 | 
 67 | pub static SEND_DURATION: Lazy<Family<ChannelMetricLabels, Gauge<f64, AtomicU64>>> =
 68 |     Lazy::new(Family::<ChannelMetricLabels, Gauge<f64, AtomicU64>>::default);
 69 | 
 70 | pub static RECEIVE_DURATION: Lazy<Family<ChannelMetricLabels, Gauge<f64, AtomicU64>>> =
 71 |     Lazy::new(Family::<ChannelMetricLabels, Gauge<f64, AtomicU64>>::default);
 72 | 
 73 | pub static FAILED_SENDS_COUNT: Lazy<Family<ChannelMetricLabels, Counter>> =
 74 |     Lazy::new(Family::<ChannelMetricLabels, Counter>::default);
 75 | 
 76 | pub static FAILED_RECEIVES_COUNT: Lazy<Family<ChannelMetricLabels, Counter>> =
 77 |     Lazy::new(Family::<ChannelMetricLabels, Counter>::default);
 78 | 
 79 | pub static CHANNEL_SIZE: Lazy<Family<ChannelMetricLabels, Gauge>> =
 80 |     Lazy::new(Family::<ChannelMetricLabels, Gauge>::default);
 81 | 
 82 | #[derive(Builder, Clone)]
 83 | pub struct ChannelMetrics {
 84 |     pub labels: ChannelMetricLabels,
 85 | }
 86 | 
 87 | impl ChannelMetrics {
 88 |     pub fn new(output_of: String) -> Self {
 89 |         Self {
 90 |             labels: ChannelMetricLabels { output_of },
 91 |         }
 92 |     }
 93 | }
 94 | 
 95 | impl ChannelMetrics {
 96 |     pub fn inc_sent_messages_count(&self) -> &Self {
 97 |         SENT_MESSAGES_COUNT.get_or_create(&self.labels).inc();
 98 |         self
 99 |     }
100 | 
101 |     pub fn inc_received_messages_count(&self) -> &Self {
102 |         RECEIVED_MESSAGES_COUNT.get_or_create(&self.labels).inc();
103 |         self
104 |     }
105 | 
106 |     pub fn inc_failed_sends_count(&self) -> &Self {
107 |         FAILED_SENDS_COUNT.get_or_create(&self.labels).inc();
108 |         self
109 |     }
110 | 
111 |     pub fn inc_failed_receives_count(&self) -> &Self {
112 |         FAILED_RECEIVES_COUNT.get_or_create(&self.labels).inc();
113 |         self
114 |     }
115 | 
116 |     pub fn log_send_duration(&self, duration: f64) -> &Self {
117 |         SEND_DURATION.get_or_create(&self.labels).set(duration);
118 |         self
119 |     }
120 | 
121 |     pub fn log_receive_duration(&self, duration: f64) -> &Self {
122 |         RECEIVE_DURATION.get_or_create(&self.labels).set(duration);
123 |         self
124 |     }
125 | 
126 |     pub fn log_channel_size(&self, size: u64) -> &Self {
127 |         CHANNEL_SIZE.get_or_create(&self.labels).set(size as i64);
128 |         self
129 |     }
130 | }
131 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/instrumented-channel/src/lib.rs:
--------------------------------------------------------------------------------
  1 | pub mod channel_metrics;
  2 | 
  3 | use channel_metrics::ChannelMetrics;
  4 | use delegate::delegate;
  5 | /**
  6 | 
  7 | # Instrumented Channel
  8 | This is a wrapper and abstraction over the kanal channel (for now), but it can be extended to support other channels as well.
  9 | 
 10 | The main purpose of this crate is to provide a way to instrument the channel, so that we can track the number of messages sent and received, and the time taken to send and receive messages.
 11 | 
 12 | ## Example
 13 | ```rust
 14 | use instrumented_channel::instrumented_bounded_channel;
 15 | use tokio::time::{sleep, Duration};
 16 | 
 17 | #[tokio::main]
 18 | async fn main() {
 19 |     let (sender, receiver) = instrumented_bounded_channel("channel_name", 10);
 20 |     sender.send(42).await.unwrap();
 21 |     assert_eq!(receiver.recv().await.unwrap(), 42);
 22 | }
 23 | ```
 24 |  **/
 25 | use kanal::{AsyncReceiver, AsyncSender, ReceiveError, SendError};
 26 | 
 27 | pub struct InstrumentedAsyncSender<T> {
 28 |     pub(crate) sender: AsyncSender<T>,
 29 |     // Metrics
 30 |     pub(crate) channel_metrics: channel_metrics::ChannelMetrics,
 31 | }
 32 | 
 33 | impl<T> InstrumentedAsyncSender<T> {
 34 |     // shared_send_impl methods
 35 |     delegate! {
 36 |         to self.sender {
 37 |             pub fn is_disconnected(&self) -> bool;
 38 |             pub fn len(&self) -> usize;
 39 |             pub fn is_empty(&self) -> bool;
 40 |             pub fn is_full(&self) -> bool;
 41 |             pub fn capacity(&self);
 42 |             pub fn receiver_count(&self) -> u32;
 43 |             pub fn sender_count(&self) -> u32;
 44 |             pub fn close(&self) -> bool;
 45 |             pub fn is_closed(&self) -> bool;
 46 |         }
 47 |     }
 48 | 
 49 |     pub fn new(sender: AsyncSender<T>, output_of: &str) -> Self {
 50 |         let channel_metrics = ChannelMetrics::new(output_of.to_string());
 51 | 
 52 |         Self {
 53 |             sender,
 54 |             channel_metrics,
 55 |         }
 56 |     }
 57 | 
 58 |     pub async fn send(&'_ self, data: T) -> Result<(), SendError> {
 59 |         let send_start = std::time::Instant::now();
 60 |         let res = self.sender.send(data).await;
 61 |         let send_duration = send_start.elapsed();
 62 | 
 63 |         if res.is_err() {
 64 |             self.channel_metrics
 65 |                 .log_send_duration(send_duration.as_secs_f64())
 66 |                 .log_channel_size(self.sender.len() as u64)
 67 |                 .inc_failed_sends_count();
 68 |         } else {
 69 |             self.channel_metrics
 70 |                 .log_send_duration(send_duration.as_secs_f64())
 71 |                 .log_channel_size(self.sender.len() as u64)
 72 |                 .inc_sent_messages_count();
 73 |         }
 74 | 
 75 |         res
 76 |     }
 77 | }
 78 | 
 79 | impl<T> Clone for InstrumentedAsyncSender<T> {
 80 |     fn clone(&self) -> Self {
 81 |         Self {
 82 |             sender: self.sender.clone(),
 83 |             channel_metrics: self.channel_metrics.clone(),
 84 |         }
 85 |     }
 86 | }
 87 | 
 88 | pub struct InstrumentedAsyncReceiver<T> {
 89 |     pub(crate) receiver: AsyncReceiver<T>,
 90 |     // Metrics
 91 |     pub(crate) channel_metrics: ChannelMetrics,
 92 | }
 93 | 
 94 | impl<T> InstrumentedAsyncReceiver<T> {
 95 |     // shared_recv_impl methods
 96 |     delegate! {
 97 |         to self.receiver {
 98 |             pub fn is_disconnected(&self) -> bool;
 99 |             pub fn len(&self) -> usize;
100 |             pub fn is_empty(&self) -> bool;
101 |             pub fn is_full(&self) -> bool;
102 |             pub fn capacity(&self);
103 |             pub fn receiver_count(&self) -> u32;
104 |             pub fn sender_count(&self) -> u32;
105 |             pub fn close(&self) -> bool;
106 |             pub fn is_closed(&self) -> bool;
107 |         }
108 |     }
109 | 
110 |     pub fn new(receiver: AsyncReceiver<T>, output_of: &str) -> Self {
111 |         let channel_metrics = ChannelMetrics::new(output_of.to_string());
112 |         Self {
113 |             receiver,
114 |             channel_metrics,
115 |         }
116 |     }
117 | 
118 |     pub async fn recv(&'_ self) -> Result<T, ReceiveError> {
119 |         let receive_start = std::time::Instant::now();
120 |         let result = self.receiver.recv().await;
121 |         let receive_duration = receive_start.elapsed();
122 | 
123 |         if result.is_err() {
124 |             self.channel_metrics
125 |                 .log_receive_duration(receive_duration.as_secs_f64())
126 |                 .log_channel_size(self.receiver.len() as u64)
127 |                 .inc_failed_receives_count();
128 |         } else {
129 |             self.channel_metrics
130 |                 .log_receive_duration(receive_duration.as_secs_f64())
131 |                 .log_channel_size(self.receiver.len() as u64)
132 |                 .inc_received_messages_count();
133 |         }
134 | 
135 |         result
136 |     }
137 | }
138 | 
139 | impl<T> Clone for InstrumentedAsyncReceiver<T> {
140 |     fn clone(&self) -> Self {
141 |         Self {
142 |             receiver: self.receiver.clone(),
143 |             channel_metrics: self.channel_metrics.clone(),
144 |         }
145 |     }
146 | }
147 | 
148 | pub fn instrumented_bounded_channel<T>(
149 |     output_of: &str,
150 |     size: usize,
151 | ) -> (InstrumentedAsyncSender<T>, InstrumentedAsyncReceiver<T>) {
152 |     let (sender, receiver) = kanal::bounded_async(size);
153 |     (
154 |         InstrumentedAsyncSender::new(sender, output_of),
155 |         InstrumentedAsyncReceiver::new(receiver, output_of),
156 |     )
157 | }
158 | 
159 | pub fn instrumented_unbounded_channel<T>(
160 |     output_of: &str,
161 | ) -> (InstrumentedAsyncSender<T>, InstrumentedAsyncReceiver<T>) {
162 |     let (sender, receiver) = kanal::unbounded_async();
163 |     (
164 |         InstrumentedAsyncSender::new(sender, output_of),
165 |         InstrumentedAsyncReceiver::new(receiver, output_of),
166 |     )
167 | }
168 | 
169 | #[cfg(test)]
170 | mod tests {
171 |     use super::*;
172 |     use prometheus::Encoder;
173 | 
174 |     fn gather_metrics_to_string() -> String {
175 |         let metrics = prometheus::gather();
176 |         let mut buffer = vec![];
177 |         let encoder = prometheus::TextEncoder::new();
178 |         encoder.encode(&metrics, &mut buffer).unwrap();
179 |         String::from_utf8(buffer).unwrap()
180 |     }
181 |     #[tokio::test]
182 |     #[allow(clippy::needless_return)]
183 |     async fn test_instrumented_channel() {
184 |         let (sender, receiver) = instrumented_bounded_channel("my_channel", 10);
185 |         sender.send(42).await.unwrap();
186 |         sender.send(999).await.unwrap();
187 |         sender.send(3).await.unwrap();
188 |         assert_eq!(receiver.recv().await.unwrap(), 42);
189 |         // TODO: check prometheus metrics
190 |         let metrics = gather_metrics_to_string();
191 |         println!("{metrics}");
192 |     }
193 | }
194 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/moving-average/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "aptos-moving-average"
 3 | description = "Utility to calculate moving average such as TPS"
 4 | version = "0.1.0"
 5 | 
 6 | # Workspace inherited keys
 7 | authors = { workspace = true }
 8 | edition = { workspace = true }
 9 | homepage = { workspace = true }
10 | license = { workspace = true }
11 | publish = { workspace = true }
12 | repository = { workspace = true }
13 | rust-version = { workspace = true }
14 | 
15 | [dependencies]
16 | chrono = { workspace = true }
17 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/moving-average/src/lib.rs:
--------------------------------------------------------------------------------
 1 | // Copyright © Aptos Foundation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #![forbid(unsafe_code)]
 5 | 
 6 | use std::collections::VecDeque;
 7 | 
 8 | // TPS data
 9 | pub struct MovingAverage {
10 |     window_millis: u64,
11 |     // (timestamp_millis, value)
12 |     values: VecDeque<(u64, u64)>,
13 |     sum: u64,
14 | }
15 | 
16 | impl MovingAverage {
17 |     pub fn new(window_millis: u64) -> Self {
18 |         let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64;
19 |         let mut queue = VecDeque::new();
20 |         queue.push_back((now, 0));
21 |         Self {
22 |             window_millis,
23 |             values: queue,
24 |             sum: 0,
25 |         }
26 |     }
27 | 
28 |     pub fn tick_now(&mut self, value: u64) {
29 |         let now = chrono::Utc::now().naive_utc().and_utc().timestamp_millis() as u64;
30 |         self.tick(now, value);
31 |     }
32 | 
33 |     pub fn tick(&mut self, timestamp_millis: u64, value: u64) -> f64 {
34 |         self.values.push_back((timestamp_millis, value));
35 |         self.sum += value;
36 |         while self.values.len() > 2 {
37 |             match self.values.front() {
38 |                 None => break,
39 |                 Some((ts, val)) => {
40 |                     if timestamp_millis - ts > self.window_millis {
41 |                         self.sum -= val;
42 |                         self.values.pop_front();
43 |                     } else {
44 |                         break;
45 |                     }
46 |                 },
47 |             }
48 |         }
49 |         self.avg()
50 |     }
51 | 
52 |     // Only be called after tick_now/tick is called.
53 |     pub fn avg(&self) -> f64 {
54 |         if self.values.len() < 2 {
55 |             0.0
56 |         } else {
57 |             let elapsed = self.values.back().unwrap().0 - self.values.front().unwrap().0;
58 |             (self.sum * 1000) as f64 / elapsed as f64
59 |         }
60 |     }
61 | 
62 |     pub fn sum(&self) -> u64 {
63 |         self.sum
64 |     }
65 | }
66 | 
67 | #[cfg(test)]
68 | mod test {
69 |     use super::*;
70 | 
71 |     #[test]
72 |     fn test_moving_average() {
73 |         // 10 Second window.
74 |         let mut ma = MovingAverage::new(10_000);
75 |         // 9 seconds spent at 100 TPS.
76 |         for _ in 0..9 {
77 |             ma.tick_now(100);
78 |             std::thread::sleep(std::time::Duration::from_secs(1));
79 |         }
80 |         // No matter what algorithm we use, the average should be 99 at least.
81 |         let avg = ma.avg();
82 |         assert!(avg >= 99.0, "Average is too low: {avg}");
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | combine_control_expr = false
 2 | edition = "2021"
 3 | imports_granularity = "Crate"
 4 | format_macro_matchers = true
 5 | group_imports = "One"
 6 | hex_literal_case = "Upper"
 7 | match_block_trailing_comma = true
 8 | newline_style = "Unix"
 9 | overflow_delimited_expr = true
10 | reorder_impl_items = true
11 | use_field_init_shorthand = true
12 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sample/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "sample"
 3 | version = "0.1.0"
 4 | 
 5 | # Workspace inherited keys
 6 | authors = { workspace = true }
 7 | edition = { workspace = true }
 8 | homepage = { workspace = true }
 9 | license = { workspace = true }
10 | publish = { workspace = true }
11 | repository = { workspace = true }
12 | rust-version = { workspace = true }
13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14 | 
15 | [dependencies]
16 | tracing = { workspace = true }
17 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sample/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     sync::atomic::{AtomicU64, Ordering},
  3 |     time::{Duration, SystemTime},
  4 | };
  5 | 
  6 | /// ## Sampling logs
  7 | ///
  8 | /// Sometimes logging a large amount of data is expensive.  In order to log information only part
  9 | /// of the time, we've added a `sample!` macro that's configurable on how often we want to execute some code.
 10 | ///
 11 | /// `SampleRate` determines how often the sampled statement will occur.
 12 | ///
 13 | /// ```
 14 | /// use sample::{sample, SampleRate, Sampling};
 15 | /// use std::time::Duration;
 16 | /// use tracing::info;
 17 | ///
 18 | /// // Sampled based on frequency of events, log only every 2 logs
 19 | /// sample!(SampleRate::Frequency(2), info!("Long log"));
 20 | ///
 21 | /// // Sampled based on time passed, log at most once a minute
 22 | /// sample!(SampleRate::Duration(Duration::from_secs(60)), info!("Long log"));
 23 | /// ```
 24 | /// The rate at which a `sample!` macro will run it's given function
 25 | #[derive(Debug)]
 26 | pub enum SampleRate {
 27 |     /// Only sample a single time during a window of time. This rate only has a resolution in
 28 |     /// seconds.
 29 |     Duration(Duration),
 30 |     /// Sample based on the frequency of the event. The provided u64 is the inverse of the
 31 |     /// frequency (1/x), for example Frequency(2) means that 1 out of every 2 events will be
 32 |     /// sampled (1/2).
 33 |     Frequency(u64),
 34 |     /// Always Sample
 35 |     Always,
 36 | }
 37 | 
 38 | /// An internal struct that can be checked if a sample is ready for the `sample!` macro
 39 | pub struct Sampling {
 40 |     rate: SampleRate,
 41 |     state: AtomicU64,
 42 | }
 43 | 
 44 | impl Sampling {
 45 |     pub const fn new(rate: SampleRate) -> Self {
 46 |         Self {
 47 |             rate,
 48 |             state: AtomicU64::new(0),
 49 |         }
 50 |     }
 51 | 
 52 |     pub fn sample(&self) -> bool {
 53 |         match &self.rate {
 54 |             SampleRate::Duration(rate) => Self::sample_duration(rate, &self.state),
 55 |             SampleRate::Frequency(rate) => Self::sample_frequency(*rate, &self.state),
 56 |             SampleRate::Always => true,
 57 |         }
 58 |     }
 59 | 
 60 |     fn sample_frequency(rate: u64, count: &AtomicU64) -> bool {
 61 |         let previous_count = count
 62 |             .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |count| {
 63 |                 let new_count = if count == 0 {
 64 |                     rate.saturating_sub(1)
 65 |                 } else {
 66 |                     count.saturating_sub(1)
 67 |                 };
 68 |                 Some(new_count)
 69 |             })
 70 |             .expect("Closure should always returns 'Some'. This is a Bug.");
 71 | 
 72 |         previous_count == 0
 73 |     }
 74 | 
 75 |     fn sample_duration(rate: &Duration, last_sample: &AtomicU64) -> bool {
 76 |         let rate = rate.as_secs();
 77 |         // Seconds since Unix Epoch
 78 |         let now = SystemTime::now()
 79 |             .duration_since(SystemTime::UNIX_EPOCH)
 80 |             .expect("SystemTime before UNIX EPOCH!")
 81 |             .as_secs();
 82 | 
 83 |         last_sample
 84 |             .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |last_sample| {
 85 |                 if now.saturating_sub(last_sample) >= rate {
 86 |                     Some(now)
 87 |                 } else {
 88 |                     None
 89 |                 }
 90 |             })
 91 |             .is_ok()
 92 |     }
 93 | }
 94 | 
 95 | /// Samples a given function at a `SampleRate`, useful for periodically emitting logs or metrics on
 96 | /// high throughput pieces of code.
 97 | #[macro_export]
 98 | macro_rules! sample {
 99 |     ($sample_rate:expr, $($args:expr)+ ,) => {
100 |         $crate::sample!($sample_rate, $($args)+);
101 |     };
102 | 
103 |     ($sample_rate:expr, $($args:tt)+) => {{
104 |         static SAMPLING: $crate::Sampling = $crate::Sampling::new($sample_rate);
105 |         if SAMPLING.sample() {
106 |             $($args)+
107 |         }
108 |     }};
109 | }
110 | 
111 | #[cfg(test)]
112 | mod tests {
113 |     use super::*;
114 | 
115 |     #[test]
116 |     fn frequency() {
117 |         // Frequency
118 |         let sampling = Sampling::new(SampleRate::Frequency(10));
119 |         let mut v = Vec::new();
120 |         for i in 0..=25 {
121 |             if sampling.sample() {
122 |                 v.push(i);
123 |             }
124 |         }
125 | 
126 |         assert_eq!(v, vec![0, 10, 20]);
127 |     }
128 | 
129 |     #[test]
130 |     fn always() {
131 |         // Always
132 |         let sampling = Sampling::new(SampleRate::Always);
133 |         let mut v = Vec::new();
134 |         for i in 0..5 {
135 |             if sampling.sample() {
136 |                 v.push(i);
137 |             }
138 |         }
139 | 
140 |         assert_eq!(v, vec![0, 1, 2, 3, 4]);
141 |     }
142 | 
143 |     #[ignore]
144 |     #[test]
145 |     fn duration() {
146 |         // Duration
147 |         let sampling = Sampling::new(SampleRate::Duration(Duration::from_secs(1)));
148 |         let mut v = Vec::new();
149 |         for i in 0..5 {
150 |             if sampling.sample() {
151 |                 v.push(i);
152 |             }
153 | 
154 |             std::thread::sleep(Duration::from_millis(500));
155 |         }
156 | 
157 |         assert_eq!(v.len(), 2);
158 |     }
159 | 
160 |     #[test]
161 |     fn macro_expansion() {
162 |         for i in 0..10 {
163 |             sample!(
164 |                 SampleRate::Frequency(2),
165 |                 println!("loooooooooooooooooooooooooong hello {}", i),
166 |             );
167 | 
168 |             sample!(SampleRate::Frequency(2), {
169 |                 println!("hello {i}");
170 |             });
171 | 
172 |             sample!(SampleRate::Frequency(2), println!("hello {i}"));
173 | 
174 |             sample! {
175 |                 SampleRate::Frequency(2),
176 | 
177 |                 for j in 10..20 {
178 |                     println!("hello {j}");
179 |                 }
180 |             }
181 |         }
182 |     }
183 | 
184 |     #[test]
185 |     fn threaded() {
186 |         fn work() -> usize {
187 |             let mut count = 0;
188 | 
189 |             for _ in 0..1000 {
190 |                 sample!(SampleRate::Frequency(5), count += 1);
191 |             }
192 | 
193 |             count
194 |         }
195 | 
196 |         let mut handles = Vec::new();
197 |         for _ in 0..10 {
198 |             handles.push(std::thread::spawn(work));
199 |         }
200 | 
201 |         let mut count = 0;
202 |         for handle in handles {
203 |             count += handle.join().unwrap();
204 |         }
205 | 
206 |         assert_eq!(count, 2000);
207 |     }
208 | }
209 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "aptos-indexer-processor-sdk"
 3 | version = "0.1.0"
 4 | 
 5 | # Workspace inherited keys
 6 | authors = { workspace = true }
 7 | edition = { workspace = true }
 8 | homepage = { workspace = true }
 9 | license = { workspace = true }
10 | publish = { workspace = true }
11 | repository = { workspace = true }
12 | rust-version = { workspace = true }
13 | 
14 | [dependencies]
15 | ahash = { workspace = true }
16 | anyhow = { workspace = true }
17 | aptos-indexer-transaction-stream = { workspace = true }
18 | aptos-protos = { workspace = true }
19 | async-trait = { workspace = true }
20 | autometrics = { workspace = true }
21 | axum = { workspace = true }
22 | backtrace = { workspace = true }
23 | bcs = { workspace = true }
24 | bigdecimal = { workspace = true }
25 | chrono = { workspace = true }
26 | clap = { workspace = true }
27 | derive_builder = { workspace = true }
28 | diesel = { workspace = true, optional = true }
29 | diesel-async = { workspace = true, optional = true }
30 | diesel_migrations = { workspace = true, optional = true }
31 | field_count = { workspace = true, optional = true }
32 | futures = { workspace = true }
33 | futures-util = { workspace = true }
34 | hex = { workspace = true }
35 | indexmap = { workspace = true }
36 | instrumented-channel = { workspace = true }
37 | kanal = { workspace = true }
38 | mockall = { workspace = true }
39 | native-tls = { workspace = true, optional = true }
40 | num_cpus = { workspace = true }
41 | once_cell = { workspace = true }
42 | petgraph = { workspace = true }
43 | postgres-native-tls = { workspace = true, optional = true }
44 | prometheus = { workspace = true }
45 | prometheus-client = { workspace = true }
46 | serde = { workspace = true }
47 | serde_json = { workspace = true }
48 | serde_yaml = { workspace = true }
49 | sha2 = { workspace = true }
50 | tempfile = { workspace = true }
51 | testcontainers = { workspace = true, optional = true }
52 | thiserror = { workspace = true }
53 | tiny-keccak = { workspace = true }
54 | tokio = { workspace = true }
55 | tokio-postgres = { workspace = true, optional = true }
56 | tokio-retry = { workspace = true, optional = true }
57 | tokio-stream = { workspace = true, optional = true }
58 | toml = { workspace = true }
59 | tonic = { workspace = true, optional = true }
60 | tracing = { workspace = true }
61 | tracing-subscriber = { workspace = true }
62 | url = { workspace = true }
63 | 
64 | [target.'cfg(target_os = "linux")'.dependencies]
65 | aptos-system-utils = { workspace = true }
66 | 
67 | [features]
68 | postgres_partial = [
69 |     "diesel",
70 |     "diesel-async",
71 |     "diesel_migrations",
72 |     "field_count",
73 |     "postgres-native-tls",
74 |     "native-tls",
75 |     "tokio-postgres",
76 | ]
77 | # When using the postgres_full features we enable the diesel/postgres feature. We configure
78 | # it in a feature so the CLI can opt out, since it cannot tolerate the libpq dep.
79 | # Recall that features should always be additive.
80 | postgres_full = ["postgres_partial", "diesel/postgres"]
81 | testing_framework = ["testcontainers", "tonic", "tokio-retry", "tokio-stream"]
82 | default = []
83 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/builder/dag.rs:
--------------------------------------------------------------------------------
 1 | use crate::traits::{RunnableStep, RunnableStepWithInputReceiver};
 2 | use tokio::task::JoinHandle;
 3 | 
 4 | pub fn connect_two_steps<LeftInput, LeftOutput, RightOutput, LeftStep, RightStep>(
 5 |     left_step: RunnableStepWithInputReceiver<LeftInput, LeftOutput, LeftStep>,
 6 |     right_step: RightStep,
 7 |     channel_size: usize,
 8 | ) -> (
 9 |     JoinHandle<()>,
10 |     RunnableStepWithInputReceiver<LeftOutput, RightOutput, RightStep>,
11 | )
12 | where
13 |     LeftInput: Send + 'static,
14 |     LeftOutput: Send + 'static,
15 |     RightOutput: Send + 'static,
16 |     LeftStep: RunnableStep<LeftInput, LeftOutput> + Send + Sized + 'static,
17 |     RightStep: RunnableStep<LeftOutput, RightOutput> + Send + Sized + 'static,
18 | {
19 |     let RunnableStepWithInputReceiver {
20 |         input_receiver: left_input_receiver,
21 |         _input_sender: _left_input_sender,
22 |         step: left_step,
23 |         ..
24 |     } = left_step;
25 | 
26 |     let (left_output_receiver, left_handle) = left_step.spawn(
27 |         Some(left_input_receiver.clone()),
28 |         channel_size,
29 |         _left_input_sender,
30 |     );
31 | 
32 |     let right_step_with_input_receiver =
33 |         RunnableStepWithInputReceiver::new(left_output_receiver, right_step);
34 | 
35 |     (left_handle, right_step_with_input_receiver)
36 | }
37 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/builder/mod.rs:
--------------------------------------------------------------------------------
1 | mod dag;
2 | mod processor_builder;
3 | 
4 | pub use processor_builder::ProcessorBuilder;
5 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/arcify_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable},
  3 |     types::transaction_context::TransactionContext,
  4 |     utils::errors::ProcessorError,
  5 | };
  6 | use std::{marker::PhantomData, sync::Arc};
  7 | 
  8 | pub struct ArcifyStep<T: Send + Sync + 'static>
  9 | where
 10 |     Self: Sized + Send + 'static,
 11 | {
 12 |     _marker: PhantomData<T>,
 13 | }
 14 | 
 15 | impl<T: Send + Sync + 'static> ArcifyStep<T> {
 16 |     pub fn new() -> Self {
 17 |         Self {
 18 |             _marker: PhantomData,
 19 |         }
 20 |     }
 21 | }
 22 | 
 23 | impl<T: Send + Sync + 'static> Default for ArcifyStep<T> {
 24 |     fn default() -> Self {
 25 |         Self::new()
 26 |     }
 27 | }
 28 | 
 29 | #[async_trait::async_trait]
 30 | impl<T> Processable for ArcifyStep<T>
 31 | where
 32 |     T: Send + Sync + 'static,
 33 | {
 34 |     type Input = Vec<T>;
 35 |     type Output = Vec<Arc<T>>;
 36 |     type RunType = AsyncRunType;
 37 | 
 38 |     async fn process(
 39 |         &mut self,
 40 |         item: TransactionContext<Vec<T>>,
 41 |     ) -> Result<Option<TransactionContext<Vec<Arc<T>>>>, ProcessorError> {
 42 |         Ok(Some(TransactionContext {
 43 |             data: item.data.into_iter().map(Arc::new).collect(),
 44 |             metadata: item.metadata,
 45 |         }))
 46 |     }
 47 | }
 48 | 
 49 | impl<T> AsyncStep for ArcifyStep<T> where T: Send + Sync + 'static {}
 50 | 
 51 | impl<T> NamedStep for ArcifyStep<T>
 52 | where
 53 |     T: Send + Sync + 'static,
 54 | {
 55 |     fn name(&self) -> String {
 56 |         format!("Arcify<{}>", std::any::type_name::<T>())
 57 |     }
 58 | }
 59 | 
 60 | #[cfg(test)]
 61 | mod tests {
 62 |     use super::*;
 63 |     use crate::types::transaction_context::TransactionMetadata;
 64 | 
 65 |     fn generate_transaction_context() -> TransactionContext<Vec<usize>> {
 66 |         TransactionContext {
 67 |             data: vec![1, 2, 3],
 68 |             metadata: TransactionMetadata {
 69 |                 start_version: 0,
 70 |                 end_version: 0,
 71 |                 start_transaction_timestamp: None,
 72 |                 end_transaction_timestamp: None,
 73 |                 total_size_in_bytes: 0,
 74 |             },
 75 |         }
 76 |     }
 77 | 
 78 |     #[tokio::test]
 79 |     #[allow(clippy::needless_return)]
 80 |     async fn test_arcify_step_process() {
 81 |         let mut step = ArcifyStep::<usize>::new();
 82 |         let input = generate_transaction_context();
 83 | 
 84 |         let result = step.process(input).await.unwrap().unwrap();
 85 |         assert_eq!(result.data.len(), 3);
 86 |         assert_eq!(*result.data[0], 1);
 87 |         assert_eq!(*result.data[1], 2);
 88 |         assert_eq!(*result.data[2], 3);
 89 |     }
 90 | 
 91 |     #[tokio::test]
 92 |     #[allow(clippy::needless_return)]
 93 |     async fn test_arcify_strong_count() {
 94 |         let mut step = ArcifyStep::<usize>::new();
 95 |         let input = generate_transaction_context();
 96 | 
 97 |         let result = step.process(input).await.unwrap().unwrap();
 98 |         assert_eq!(Arc::strong_count(&result.data[0]), 1);
 99 | 
100 |         let arc_clone = result.data[0].clone();
101 |         assert_eq!(Arc::strong_count(&arc_clone), 2);
102 | 
103 |         drop(arc_clone);
104 |         assert_eq!(Arc::strong_count(&result.data[0]), 1);
105 |     }
106 | 
107 |     #[tokio::test]
108 |     #[allow(clippy::needless_return)]
109 |     async fn test_arcify_ptr_eq() {
110 |         let mut step = ArcifyStep::<usize>::new();
111 |         let input = generate_transaction_context();
112 | 
113 |         let result = step.process(input).await.unwrap().unwrap();
114 |         let arc_clone = result.data[0].clone();
115 |         assert!(Arc::ptr_eq(&result.data[0], &arc_clone));
116 |     }
117 | }
118 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod arcify_step;
 2 | pub mod order_by_version_step;
 3 | pub mod timed_buffer_step;
 4 | pub mod transaction_stream_step;
 5 | pub mod version_tracker_step;
 6 | pub mod write_rate_limit_step;
 7 | 
 8 | // Re-export the steps
 9 | pub use arcify_step::ArcifyStep;
10 | pub use order_by_version_step::OrderByVersionStep;
11 | pub use timed_buffer_step::TimedBufferStep;
12 | pub use transaction_stream_step::TransactionStreamStep;
13 | pub use version_tracker_step::{
14 |     ProcessorStatusSaver, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS,
15 | };
16 | pub use write_rate_limit_step::{Sizeable, WriteRateLimitConfig, WriteRateLimitStep};
17 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/order_by_version_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     traits::{
  3 |         pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable,
  4 |     },
  5 |     types::transaction_context::TransactionContext,
  6 |     utils::errors::ProcessorError,
  7 | };
  8 | use ahash::AHashMap;
  9 | use anyhow::Result;
 10 | use async_trait::async_trait;
 11 | use std::time::Duration;
 12 | 
 13 | /// OrderByVersionStep is a step that orders TransactionContexts by their starting versions.
 14 | /// It buffers ordered TransactionContexts and releases them at every poll_interval.
 15 | pub struct OrderByVersionStep<Input>
 16 | where
 17 |     Self: Sized + Send + 'static,
 18 |     Input: Send + 'static,
 19 | {
 20 |     pub ordered_versions: Vec<TransactionContext<Input>>,
 21 |     pub unordered_versions: AHashMap<u64, TransactionContext<Input>>,
 22 |     pub expected_next_version: u64,
 23 |     // Duration to poll and return the ordered versions
 24 |     pub poll_interval: Duration,
 25 | }
 26 | 
 27 | impl<Input> OrderByVersionStep<Input>
 28 | where
 29 |     Self: Sized + Send + 'static,
 30 |     Input: Send + 'static,
 31 | {
 32 |     pub fn new(starting_version: u64, poll_interval: Duration) -> Self {
 33 |         Self {
 34 |             ordered_versions: Vec::new(),
 35 |             unordered_versions: AHashMap::new(),
 36 |             expected_next_version: starting_version,
 37 |             poll_interval,
 38 |         }
 39 |     }
 40 | 
 41 |     fn update_ordered_versions(&mut self) {
 42 |         // While there are batches in unordered_versions that are in order, add them to ordered_versions
 43 |         while let Some(batch) = self
 44 |             .unordered_versions
 45 |             .remove(&(self.expected_next_version))
 46 |         {
 47 |             self.expected_next_version = batch.metadata.end_version + 1;
 48 |             self.ordered_versions.push(batch);
 49 |         }
 50 |     }
 51 | }
 52 | 
 53 | #[async_trait]
 54 | impl<Input> Processable for OrderByVersionStep<Input>
 55 | where
 56 |     Input: Send + Sync + 'static,
 57 | {
 58 |     type Input = Input;
 59 |     type Output = Input;
 60 |     type RunType = PollableAsyncRunType;
 61 | 
 62 |     async fn process(
 63 |         &mut self,
 64 |         current_batch: TransactionContext<Input>,
 65 |     ) -> Result<Option<TransactionContext<Input>>, ProcessorError> {
 66 |         // If there's a gap in the expected_next_version and current_version
 67 |         // have the current_version to unordered_versions for later processing.
 68 |         if self.expected_next_version != current_batch.metadata.start_version {
 69 |             tracing::debug!(
 70 |                 next_version = self.expected_next_version,
 71 |                 step = self.name(),
 72 |                 "Gap detected starting from version: {}",
 73 |                 current_batch.metadata.start_version
 74 |             );
 75 |             self.unordered_versions
 76 |                 .insert(current_batch.metadata.start_version, current_batch);
 77 |         } else {
 78 |             tracing::debug!("No gap detected");
 79 |             self.expected_next_version = current_batch.metadata.end_version + 1;
 80 |             self.ordered_versions.push(current_batch);
 81 | 
 82 |             // If the current_versions is the expected_next_version, update the ordered_versions
 83 |             self.update_ordered_versions();
 84 |         }
 85 | 
 86 |         // TODO: Consider adding a metric for the number of unordered_versions for debugging, performance tesing
 87 | 
 88 |         // Pass through
 89 |         Ok(None) // No immediate output
 90 |     }
 91 | 
 92 |     // Once polling ends, release the remaining ordered items in buffer
 93 |     async fn cleanup(
 94 |         &mut self,
 95 |     ) -> Result<Option<Vec<TransactionContext<Self::Output>>>, ProcessorError> {
 96 |         Ok(Some(std::mem::take(&mut self.ordered_versions)))
 97 |     }
 98 | }
 99 | 
100 | #[async_trait]
101 | impl<Input: Send + Sync + 'static> PollableAsyncStep for OrderByVersionStep<Input> {
102 |     fn poll_interval(&self) -> Duration {
103 |         self.poll_interval
104 |     }
105 | 
106 |     async fn poll(&mut self) -> Result<Option<Vec<TransactionContext<Input>>>, ProcessorError> {
107 |         Ok(Some(std::mem::take(&mut self.ordered_versions)))
108 |     }
109 | }
110 | 
111 | impl<Input: Send + 'static> NamedStep for OrderByVersionStep<Input> {
112 |     // TODO: oncecell this somehow? Likely in wrapper struct...
113 |     fn name(&self) -> String {
114 |         format!("OrderByVersionStep: {}", std::any::type_name::<Input>())
115 |     }
116 | }
117 | 
118 | #[cfg(test)]
119 | mod tests {
120 |     use super::*;
121 |     use crate::{
122 |         builder::ProcessorBuilder,
123 |         test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout},
124 |         traits::{IntoRunnableStep, RunnableStepWithInputReceiver},
125 |         types::transaction_context::TransactionMetadata,
126 |     };
127 |     use instrumented_channel::instrumented_bounded_channel;
128 | 
129 |     fn generate_unordered_transaction_contexts() -> Vec<TransactionContext<()>> {
130 |         vec![
131 |             TransactionContext {
132 |                 data: (),
133 |                 metadata: TransactionMetadata {
134 |                     start_version: 100,
135 |                     end_version: 199,
136 |                     start_transaction_timestamp: None,
137 |                     end_transaction_timestamp: None,
138 |                     total_size_in_bytes: 0,
139 |                 },
140 |             },
141 |             TransactionContext {
142 |                 data: (),
143 |                 metadata: TransactionMetadata {
144 |                     start_version: 0,
145 |                     end_version: 99,
146 |                     start_transaction_timestamp: None,
147 |                     end_transaction_timestamp: None,
148 |                     total_size_in_bytes: 0,
149 |                 },
150 |             },
151 |         ]
152 |     }
153 | 
154 |     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
155 |     #[allow(clippy::needless_return)]
156 |     async fn test_order_step() {
157 |         // Setup
158 |         let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1);
159 |         let input_step = RunnableStepWithInputReceiver::new(
160 |             input_receiver,
161 |             PassThroughStep::default().into_runnable_step(),
162 |         );
163 |         let order_step = OrderByVersionStep::<()>::new(0, Duration::from_millis(250));
164 | 
165 |         let (_pb, mut output_receiver) =
166 |             ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step)
167 |                 .connect_to(order_step.into_runnable_step(), 5)
168 |                 .end_and_return_output_receiver(5);
169 | 
170 |         let unordered_transaction_contexts = generate_unordered_transaction_contexts();
171 |         let mut ordered_transaction_contexts = unordered_transaction_contexts.clone();
172 |         ordered_transaction_contexts.sort();
173 | 
174 |         for transaction_context in unordered_transaction_contexts {
175 |             input_sender.send(transaction_context).await.unwrap();
176 |         }
177 |         tokio::time::sleep(Duration::from_millis(500)).await;
178 | 
179 |         for ordered_transaction_context in ordered_transaction_contexts {
180 |             let result = receive_with_timeout(&mut output_receiver, 100)
181 |                 .await
182 |                 .unwrap();
183 |             assert_eq!(
184 |                 result.metadata.start_version,
185 |                 ordered_transaction_context.metadata.start_version
186 |             );
187 |         }
188 |     }
189 | }
190 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/timed_buffer_step.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     traits::{
 3 |         pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable,
 4 |     },
 5 |     types::transaction_context::TransactionContext,
 6 |     utils::errors::ProcessorError,
 7 | };
 8 | use anyhow::Result;
 9 | use async_trait::async_trait;
10 | use std::time::Duration;
11 | 
12 | pub struct TimedBufferStep<Input>
13 | where
14 |     Self: Sized + Send + 'static,
15 |     Input: Send + 'static,
16 | {
17 |     pub internal_buffer: Vec<TransactionContext<Input>>,
18 |     pub poll_interval: Duration,
19 | }
20 | 
21 | impl<Input> TimedBufferStep<Input>
22 | where
23 |     Self: Sized + Send + 'static,
24 |     Input: Send + 'static,
25 | {
26 |     #[allow(dead_code)]
27 |     pub fn new(poll_interval: Duration) -> Self {
28 |         Self {
29 |             internal_buffer: Vec::new(),
30 |             poll_interval,
31 |         }
32 |     }
33 | }
34 | 
35 | #[async_trait]
36 | impl<Input> Processable for TimedBufferStep<Input>
37 | where
38 |     Input: Send + Sync + 'static,
39 | {
40 |     type Input = Input;
41 |     type Output = Input;
42 |     type RunType = PollableAsyncRunType;
43 | 
44 |     async fn process(
45 |         &mut self,
46 |         item: TransactionContext<Input>,
47 |     ) -> Result<Option<TransactionContext<Input>>, ProcessorError> {
48 |         self.internal_buffer.push(item);
49 |         Ok(None) // No immediate output
50 |     }
51 | 
52 |     // Once polling ends, release the remaining items in buffer
53 |     async fn cleanup(
54 |         &mut self,
55 |     ) -> Result<Option<Vec<TransactionContext<Self::Output>>>, ProcessorError> {
56 |         Ok(Some(std::mem::take(&mut self.internal_buffer)))
57 |     }
58 | }
59 | 
60 | #[async_trait]
61 | impl<Input: Send + Sync + 'static> PollableAsyncStep for TimedBufferStep<Input> {
62 |     fn poll_interval(&self) -> Duration {
63 |         self.poll_interval
64 |     }
65 | 
66 |     async fn poll(&mut self) -> Result<Option<Vec<TransactionContext<Input>>>, ProcessorError> {
67 |         Ok(Some(std::mem::take(&mut self.internal_buffer)))
68 |     }
69 | }
70 | 
71 | impl<Input: Send + 'static> NamedStep for TimedBufferStep<Input> {
72 |     // TODO: oncecell this somehow? Likely in wrapper struct...
73 |     fn name(&self) -> String {
74 |         format!("TimedBuffer: {}", std::any::type_name::<Input>())
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/transaction_stream_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     traits::{NamedStep, PollableAsyncRunType, PollableAsyncStep, Processable},
  3 |     types::transaction_context::{TransactionContext, TransactionMetadata},
  4 |     utils::errors::ProcessorError,
  5 | };
  6 | use anyhow::Result;
  7 | use aptos_indexer_transaction_stream::{
  8 |     TransactionStream as TransactionStreamInternal, TransactionStreamConfig,
  9 | };
 10 | use aptos_protos::transaction::v1::Transaction;
 11 | use async_trait::async_trait;
 12 | use mockall::mock;
 13 | use std::time::Duration;
 14 | use tokio::sync::Mutex;
 15 | use tracing::{error, info, warn};
 16 | 
 17 | // TransactionStreamStep is establishes a gRPC connection with Transaction Stream
 18 | // fetches transactions, and outputs them for processing. It also handles reconnections with retries.
 19 | // This is usually the initial step in a processor.
 20 | pub struct TransactionStreamStep
 21 | where
 22 |     Self: Sized + Send + 'static,
 23 | {
 24 |     transaction_stream_config: TransactionStreamConfig,
 25 |     pub transaction_stream: Mutex<TransactionStreamInternal>,
 26 | }
 27 | 
 28 | impl TransactionStreamStep
 29 | where
 30 |     Self: Sized + Send + 'static,
 31 | {
 32 |     pub async fn new(
 33 |         transaction_stream_config: TransactionStreamConfig,
 34 |     ) -> Result<Self, ProcessorError> {
 35 |         let transaction_stream_res =
 36 |             TransactionStreamInternal::new(transaction_stream_config.clone()).await;
 37 |         match transaction_stream_res {
 38 |             Err(e) => Err(ProcessorError::StepInitError {
 39 |                 message: format!("Error creating transaction stream: {e:?}"),
 40 |             }),
 41 |             Ok(transaction_stream) => Ok(Self {
 42 |                 transaction_stream: Mutex::new(transaction_stream),
 43 |                 transaction_stream_config,
 44 |             }),
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | #[async_trait]
 50 | impl Processable for TransactionStreamStep
 51 | where
 52 |     Self: Sized + Send + 'static,
 53 | {
 54 |     type Input = ();
 55 |     // The TransactionStreamStep will output a batch of transactions for processing
 56 |     type Output = Vec<Transaction>;
 57 |     type RunType = PollableAsyncRunType;
 58 | 
 59 |     async fn process(
 60 |         &mut self,
 61 |         _item: TransactionContext<()>,
 62 |     ) -> Result<Option<TransactionContext<Vec<Transaction>>>, ProcessorError> {
 63 |         Ok(None)
 64 |     }
 65 | }
 66 | 
 67 | #[async_trait]
 68 | impl PollableAsyncStep for TransactionStreamStep
 69 | where
 70 |     Self: Sized + Send + Sync + 'static,
 71 | {
 72 |     fn poll_interval(&self) -> std::time::Duration {
 73 |         Duration::from_secs(0)
 74 |     }
 75 | 
 76 |     async fn poll(
 77 |         &mut self,
 78 |     ) -> Result<Option<Vec<TransactionContext<Vec<Transaction>>>>, ProcessorError> {
 79 |         let txn_pb_response_res = self
 80 |             .transaction_stream
 81 |             .lock()
 82 |             .await
 83 |             .get_next_transaction_batch()
 84 |             .await;
 85 |         match txn_pb_response_res {
 86 |             Ok(txn_pb_response) => {
 87 |                 let transactions_with_context = TransactionContext {
 88 |                     data: txn_pb_response.transactions,
 89 |                     metadata: TransactionMetadata {
 90 |                         start_version: txn_pb_response.start_version,
 91 |                         end_version: txn_pb_response.end_version,
 92 |                         start_transaction_timestamp: txn_pb_response.start_txn_timestamp,
 93 |                         end_transaction_timestamp: txn_pb_response.end_txn_timestamp,
 94 |                         total_size_in_bytes: txn_pb_response.size_in_bytes,
 95 |                     },
 96 |                 };
 97 |                 Ok(Some(vec![transactions_with_context]))
 98 |             },
 99 |             Err(e) => {
100 |                 warn!(
101 |                     stream_address = self.transaction_stream_config.indexer_grpc_data_service_address.to_string(),
102 |                     error = ?e,
103 |                     "Error fetching transactions from TransactionStream. Attempting to reconnect."
104 |                 );
105 | 
106 |                 // TransactionStream closes connections every 5 minutes. We should try to reconnect
107 |                 match self
108 |                     .transaction_stream
109 |                     .lock()
110 |                     .await
111 |                     .reconnect_to_grpc_with_retries()
112 |                     .await
113 |                 {
114 |                     Ok(_) => {
115 |                         info!(
116 |                             stream_address = self
117 |                                 .transaction_stream_config
118 |                                 .indexer_grpc_data_service_address
119 |                                 .to_string(),
120 |                             "Successfully reconnected to TransactionStream."
121 |                         );
122 |                         // Return nothing for now. The next poll will fetch the next batch of transactions.
123 |                         Ok(None)
124 |                     },
125 |                     Err(e) => {
126 |                         error!(
127 |                             stream_address = self.transaction_stream_config
128 |                                 .indexer_grpc_data_service_address
129 |                                 .to_string(),
130 |                             error = ?e,
131 |                             " Error reconnecting transaction stream."
132 |                         );
133 |                         Err(ProcessorError::PollError {
134 |                             message: format!("Error reconnecting to TransactionStream: {e:?}"),
135 |                         })
136 |                     },
137 |                 }
138 |             },
139 |         }
140 |     }
141 | 
142 |     async fn should_continue_polling(&mut self) -> bool {
143 |         let is_end = self.transaction_stream.lock().await.is_end_of_stream();
144 |         if is_end {
145 |             info!("Reached ending version");
146 |         }
147 |         !is_end
148 |     }
149 | }
150 | 
151 | impl NamedStep for TransactionStreamStep {
152 |     fn name(&self) -> String {
153 |         "TransactionStreamStep".to_string()
154 |     }
155 | }
156 | 
157 | mock! {
158 |     pub TransactionStreamStep {}
159 | 
160 |     #[async_trait]
161 |     impl Processable for TransactionStreamStep
162 |     where Self: Sized + Send + 'static,
163 |     {
164 |         type Input = ();
165 |         type Output = Vec<Transaction>;
166 |         type RunType = PollableAsyncRunType;
167 | 
168 |         async fn init(&mut self);
169 | 
170 |         async fn process(&mut self, _item: TransactionContext<()> ) -> Result<Option<TransactionContext<Vec<Transaction>>>, ProcessorError>;
171 |     }
172 | 
173 |     #[async_trait]
174 |     impl PollableAsyncStep for TransactionStreamStep
175 |     where
176 |         Self: Sized + Send + 'static,
177 |     {
178 |         fn poll_interval(&self) -> std::time::Duration;
179 | 
180 |         // async fn poll(&mut self) -> Option<Vec<TransactionsPBResponse>> {
181 |         //     // Testing framework can provide mocked transactions here
182 |         //     Some(vec![TransactionsPBResponse {
183 |         //         transactions: vec![],
184 |         //         chain_id: 0,
185 |         //         start_version: 0,
186 |         //         end_version: 100,
187 |         //         start_txn_timestamp: None,
188 |         //         end_txn_timestamp: None,
189 |         //         size_in_bytes: 10,
190 |         //     }])
191 |         // }
192 |         async fn poll(&mut self) -> Result<Option<Vec<TransactionContext<Vec<Transaction>>>>, ProcessorError>;
193 | 
194 |         async fn should_continue_polling(&mut self) -> bool;
195 |     }
196 | 
197 |     impl NamedStep for TransactionStreamStep {
198 |         fn name(&self) -> String;
199 |     }
200 | }
201 | 
202 | #[cfg(test)]
203 | mod tests {
204 |     use super::*;
205 |     use crate::{
206 |         builder::ProcessorBuilder,
207 |         test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout},
208 |         traits::IntoRunnableStep,
209 |         types::transaction_context::TransactionMetadata,
210 |     };
211 |     use mockall::Sequence;
212 |     use std::time::Duration;
213 | 
214 |     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
215 |     #[allow(clippy::needless_return)]
216 |     async fn test_transaction_stream() {
217 |         let mut mock_transaction_stream = MockTransactionStreamStep::new();
218 |         // Testing framework can provide mocked transactions here
219 |         mock_transaction_stream.expect_poll().returning(|| {
220 |             Ok(Some(vec![TransactionContext {
221 |                 data: vec![Transaction::default()],
222 |                 metadata: TransactionMetadata {
223 |                     start_version: 0,
224 |                     end_version: 100,
225 |                     start_transaction_timestamp: None,
226 |                     end_transaction_timestamp: None,
227 |                     total_size_in_bytes: 10,
228 |                 },
229 |             }]))
230 |         });
231 |         mock_transaction_stream
232 |             .expect_poll_interval()
233 |             .returning(|| Duration::from_secs(0));
234 |         mock_transaction_stream.expect_init().returning(|| {
235 |             // Do nothing
236 |         });
237 |         mock_transaction_stream
238 |             .expect_name()
239 |             .returning(|| "MockTransactionStream".to_string());
240 | 
241 |         // Set up the mock transaction stream to poll 3 times
242 |         let mut seq = Sequence::new();
243 |         mock_transaction_stream
244 |             .expect_should_continue_polling()
245 |             .times(3)
246 |             .in_sequence(&mut seq)
247 |             .return_const(true);
248 |         mock_transaction_stream
249 |             .expect_should_continue_polling()
250 |             .return_const(false);
251 | 
252 |         let pass_through_step = PassThroughStep::default();
253 | 
254 |         let (_, mut output_receiver) = ProcessorBuilder::new_with_inputless_first_step(
255 |             mock_transaction_stream.into_runnable_step(),
256 |         )
257 |         .connect_to(pass_through_step.into_runnable_step(), 5)
258 |         .end_and_return_output_receiver(5);
259 | 
260 |         tokio::time::sleep(Duration::from_millis(250)).await;
261 |         for _ in 0..3 {
262 |             let result = receive_with_timeout(&mut output_receiver, 100)
263 |                 .await
264 |                 .unwrap();
265 | 
266 |             assert_eq!(result.data.len(), 1);
267 |         }
268 | 
269 |         // After receiving 3 outputs, the channel should be empty
270 |         let result = receive_with_timeout(&mut output_receiver, 100).await;
271 |         assert!(result.is_none());
272 |     }
273 | }
274 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/common_steps/version_tracker_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     traits::{
  3 |         pollable_async_step::PollableAsyncRunType, NamedStep, PollableAsyncStep, Processable,
  4 |     },
  5 |     types::transaction_context::TransactionContext,
  6 |     utils::errors::ProcessorError,
  7 | };
  8 | use anyhow::Result;
  9 | use async_trait::async_trait;
 10 | use std::marker::PhantomData;
 11 | 
 12 | pub const DEFAULT_UPDATE_PROCESSOR_STATUS_SECS: u64 = 1;
 13 | 
 14 | /// The `ProcessorStatusSaver` trait object should be implemented in order to save the latest successfully
 15 | /// processed transaction versino to storage. I.e., persisting the `processor_status` to storage.
 16 | #[async_trait]
 17 | pub trait ProcessorStatusSaver {
 18 |     // T represents the transaction type that the processor is tracking.
 19 |     async fn save_processor_status(
 20 |         &self,
 21 |         last_success_batch: &TransactionContext<()>,
 22 |     ) -> Result<(), ProcessorError>;
 23 | }
 24 | 
 25 | /// Tracks the versioned processing of sequential transactions, ensuring no gaps
 26 | /// occur between them.
 27 | ///
 28 | /// Important: this step assumes ordered transactions. Please use the `OrederByVersionStep` before this step
 29 | /// if the transactions are not ordered.
 30 | pub struct VersionTrackerStep<T, S>
 31 | where
 32 |     Self: Sized + Send + 'static,
 33 |     T: Send + 'static,
 34 |     S: ProcessorStatusSaver + Send + 'static,
 35 | {
 36 |     // Last successful batch of sequentially processed transactions. Includes metadata to write to storage.
 37 |     last_success_batch: Option<TransactionContext<()>>,
 38 |     polling_interval_secs: u64,
 39 |     processor_status_saver: S,
 40 |     _marker: PhantomData<T>,
 41 | }
 42 | 
 43 | impl<T, S> VersionTrackerStep<T, S>
 44 | where
 45 |     Self: Sized + Send + 'static,
 46 |     T: Send + 'static,
 47 |     S: ProcessorStatusSaver + Send + 'static,
 48 | {
 49 |     pub fn new(processor_status_saver: S, polling_interval_secs: u64) -> Self {
 50 |         Self {
 51 |             last_success_batch: None,
 52 |             processor_status_saver,
 53 |             polling_interval_secs,
 54 |             _marker: PhantomData,
 55 |         }
 56 |     }
 57 | 
 58 |     async fn save_processor_status(&mut self) -> Result<(), ProcessorError> {
 59 |         if let Some(last_success_batch) = self.last_success_batch.as_ref() {
 60 |             self.processor_status_saver
 61 |                 .save_processor_status(last_success_batch)
 62 |                 .await
 63 |         } else {
 64 |             Ok(())
 65 |         }
 66 |     }
 67 | }
 68 | 
 69 | #[async_trait]
 70 | impl<T, S> Processable for VersionTrackerStep<T, S>
 71 | where
 72 |     Self: Sized + Send + 'static,
 73 |     T: Send + 'static,
 74 |     S: ProcessorStatusSaver + Send + 'static,
 75 | {
 76 |     type Input = T;
 77 |     type Output = T;
 78 |     type RunType = PollableAsyncRunType;
 79 | 
 80 |     async fn process(
 81 |         &mut self,
 82 |         current_batch: TransactionContext<T>,
 83 |     ) -> Result<Option<TransactionContext<T>>, ProcessorError> {
 84 |         // If there's a gap in version, return an error
 85 |         if let Some(last_success_batch) = self.last_success_batch.as_ref() {
 86 |             if last_success_batch.metadata.end_version + 1 != current_batch.metadata.start_version {
 87 |                 return Err(ProcessorError::ProcessError {
 88 |                     message: format!(
 89 |                         "Gap detected starting from version: {}",
 90 |                         current_batch.metadata.start_version
 91 |                     ),
 92 |                 });
 93 |             }
 94 |         }
 95 | 
 96 |         // Update the last success batch
 97 |         self.last_success_batch = Some(TransactionContext {
 98 |             data: (),
 99 |             metadata: current_batch.metadata.clone(),
100 |         });
101 | 
102 |         // Pass through
103 |         Ok(Some(current_batch))
104 |     }
105 | 
106 |     async fn cleanup(
107 |         &mut self,
108 |     ) -> Result<Option<Vec<TransactionContext<Self::Output>>>, ProcessorError> {
109 |         // If processing or polling ends, save the last successful batch to the database.
110 |         self.save_processor_status().await?;
111 |         Ok(None)
112 |     }
113 | }
114 | 
115 | #[async_trait]
116 | impl<T: Send + 'static, S> PollableAsyncStep for VersionTrackerStep<T, S>
117 | where
118 |     Self: Sized + Send + Sync + 'static,
119 |     T: Send + Sync + 'static,
120 |     S: ProcessorStatusSaver + Send + Sync + 'static,
121 | {
122 |     fn poll_interval(&self) -> std::time::Duration {
123 |         std::time::Duration::from_secs(self.polling_interval_secs)
124 |     }
125 | 
126 |     async fn poll(&mut self) -> Result<Option<Vec<TransactionContext<T>>>, ProcessorError> {
127 |         // TODO: Add metrics for gap count
128 |         self.save_processor_status().await?;
129 |         // Nothing should be returned
130 |         Ok(None)
131 |     }
132 | }
133 | 
134 | impl<T, S> NamedStep for VersionTrackerStep<T, S>
135 | where
136 |     Self: Sized + Send + 'static,
137 |     T: Send + 'static,
138 |     S: ProcessorStatusSaver + Send + 'static,
139 | {
140 |     fn name(&self) -> String {
141 |         format!("VersionTrackerStep: {}", std::any::type_name::<T>())
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/lib.rs:
--------------------------------------------------------------------------------
  1 | pub mod builder;
  2 | pub mod common_steps; // TODO: Feature gate this?
  3 | #[cfg(feature = "postgres_partial")]
  4 | pub mod postgres;
  5 | pub mod server_framework;
  6 | pub mod test;
  7 | #[cfg(feature = "testing_framework")]
  8 | pub mod testing_framework;
  9 | pub mod traits;
 10 | pub mod types;
 11 | pub mod utils;
 12 | 
 13 | // Re-exporting crates to provide a cohesive SDK interface
 14 | pub use aptos_indexer_transaction_stream;
 15 | pub use aptos_protos;
 16 | pub use bcs;
 17 | pub use instrumented_channel;
 18 | 
 19 | #[cfg(test)]
 20 | mod tests {
 21 |     use crate::{
 22 |         builder::ProcessorBuilder,
 23 |         common_steps::TimedBufferStep,
 24 |         test::{steps::pass_through_step::PassThroughStep, utils::receive_with_timeout},
 25 |         traits::{
 26 |             AsyncStep, IntoRunnableStep, NamedStep, Processable, RunnableAsyncStep,
 27 |             RunnableStepWithInputReceiver,
 28 |         },
 29 |         types::transaction_context::{TransactionContext, TransactionMetadata},
 30 |         utils::errors::ProcessorError,
 31 |     };
 32 |     use anyhow::Result;
 33 |     use async_trait::async_trait;
 34 |     use instrumented_channel::instrumented_bounded_channel;
 35 |     use std::time::Duration;
 36 | 
 37 |     #[derive(Clone, Debug, PartialEq)]
 38 |     pub struct TestStruct {
 39 |         pub i: usize,
 40 |     }
 41 | 
 42 |     fn make_test_structs(num: usize) -> Vec<TestStruct> {
 43 |         (1..(num + 1)).map(|i| TestStruct { i }).collect()
 44 |     }
 45 | 
 46 |     pub struct TestStep;
 47 | 
 48 |     impl AsyncStep for TestStep {}
 49 | 
 50 |     impl NamedStep for TestStep {
 51 |         fn name(&self) -> String {
 52 |             "TestStep".to_string()
 53 |         }
 54 |     }
 55 | 
 56 |     #[async_trait]
 57 |     impl Processable for TestStep {
 58 |         type Input = Vec<usize>;
 59 |         type Output = Vec<TestStruct>;
 60 |         type RunType = ();
 61 | 
 62 |         async fn process(
 63 |             &mut self,
 64 |             item: TransactionContext<Vec<usize>>,
 65 |         ) -> Result<Option<TransactionContext<Vec<TestStruct>>>, ProcessorError> {
 66 |             let processed = item.data.into_iter().map(|i| TestStruct { i }).collect();
 67 |             Ok(Some(TransactionContext {
 68 |                 data: processed,
 69 |                 metadata: item.metadata,
 70 |             }))
 71 |         }
 72 |     }
 73 | 
 74 |     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 75 |     #[allow(clippy::needless_return)]
 76 |     async fn test_connect_two_steps() {
 77 |         let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1);
 78 | 
 79 |         let input_step = RunnableStepWithInputReceiver::new(
 80 |             input_receiver,
 81 |             RunnableAsyncStep::new(PassThroughStep::default()),
 82 |         );
 83 | 
 84 |         // Create a timed buffer that outputs the input after 1 second
 85 |         let timed_buffer_step = TimedBufferStep::<Vec<usize>>::new(Duration::from_millis(200));
 86 |         let first_step = timed_buffer_step;
 87 | 
 88 |         let second_step = TestStep;
 89 |         let second_step = RunnableAsyncStep::new(second_step);
 90 | 
 91 |         let builder = ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step)
 92 |             .connect_to(first_step.into_runnable_step(), 5)
 93 |             .connect_to(second_step, 3);
 94 | 
 95 |         let mut fanout_builder = builder.fanout_broadcast(2);
 96 |         let (_, first_output_receiver) = fanout_builder
 97 |             .get_processor_builder()
 98 |             .unwrap()
 99 |             .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 1)
100 |             .end_and_return_output_receiver(1);
101 | 
102 |         let (second_builder, second_output_receiver) = fanout_builder
103 |             .get_processor_builder()
104 |             .unwrap()
105 |             .connect_to(
106 |                 RunnableAsyncStep::new(PassThroughStep::new_named("MaxStep".to_string())),
107 |                 2,
108 |             )
109 |             .connect_to(RunnableAsyncStep::new(PassThroughStep::default()), 5)
110 |             .end_and_return_output_receiver(5);
111 | 
112 |         let mut output_receivers = [first_output_receiver, second_output_receiver];
113 | 
114 |         output_receivers.iter().for_each(|output_receiver| {
115 |             assert_eq!(output_receiver.len(), 0, "Output should be empty");
116 |         });
117 | 
118 |         let left_input = TransactionContext {
119 |             data: vec![1, 2, 3],
120 |             metadata: TransactionMetadata {
121 |                 start_version: 0,
122 |                 end_version: 1,
123 |                 start_transaction_timestamp: None,
124 |                 end_transaction_timestamp: None,
125 |                 total_size_in_bytes: 0,
126 |             },
127 |         };
128 |         input_sender.send(left_input.clone()).await.unwrap();
129 |         tokio::time::sleep(Duration::from_millis(250)).await;
130 | 
131 |         output_receivers.iter().for_each(|output_receiver| {
132 |             assert_eq!(output_receiver.len(), 1, "Output should have 1 item");
133 |         });
134 | 
135 |         for output_receiver in output_receivers.iter_mut() {
136 |             let result = receive_with_timeout(output_receiver, 100).await.unwrap();
137 | 
138 |             assert_eq!(
139 |                 result.data,
140 |                 make_test_structs(3),
141 |                 "Output should be the same as input"
142 |             );
143 |         }
144 | 
145 |         let graph = second_builder.graph;
146 |         let dot = graph.dot();
147 |         println!("{dot:}");
148 |         //first_handle.abort();
149 |         //second_handle.abort();
150 |     }
151 | 
152 |     #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
153 |     #[allow(clippy::needless_return)]
154 |     async fn test_fanin() {
155 |         let (input_sender, input_receiver) = instrumented_bounded_channel("input", 1);
156 | 
157 |         let input_step = RunnableStepWithInputReceiver::new(
158 |             input_receiver,
159 |             RunnableAsyncStep::new(PassThroughStep::default()),
160 |         );
161 | 
162 |         let mut fanout_builder =
163 |             ProcessorBuilder::new_with_runnable_input_receiver_first_step(input_step)
164 |                 .fanout_broadcast(2);
165 | 
166 |         let (first_builder, first_output_receiver) = fanout_builder
167 |             .get_processor_builder()
168 |             .unwrap()
169 |             .connect_to(
170 |                 RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep1".to_string())),
171 |                 5,
172 |             )
173 |             .end_and_return_output_receiver(5);
174 | 
175 |         let (second_builder, second_output_receiver) = fanout_builder
176 |             .get_processor_builder()
177 |             .unwrap()
178 |             .connect_to(
179 |                 RunnableAsyncStep::new(PassThroughStep::new_named("FanoutStep2".to_string())),
180 |                 5,
181 |             )
182 |             .end_and_return_output_receiver(5);
183 | 
184 |         let test_step = TestStep;
185 |         let test_step = RunnableAsyncStep::new(test_step);
186 | 
187 |         let (_, mut fanin_output_receiver) = ProcessorBuilder::new_with_fanin_step_with_receivers(
188 |             vec![
189 |                 (first_output_receiver, first_builder.graph),
190 |                 (second_output_receiver, second_builder.graph),
191 |             ],
192 |             RunnableAsyncStep::new(PassThroughStep::new_named("FaninStep".to_string())),
193 |             3,
194 |         )
195 |         .connect_to(test_step, 10)
196 |         .end_and_return_output_receiver(6);
197 | 
198 |         assert_eq!(fanin_output_receiver.len(), 0, "Output should be empty");
199 | 
200 |         let left_input = TransactionContext {
201 |             data: vec![1, 2, 3],
202 |             metadata: TransactionMetadata {
203 |                 start_version: 0,
204 |                 end_version: 1,
205 |                 start_transaction_timestamp: None,
206 |                 end_transaction_timestamp: None,
207 |                 total_size_in_bytes: 0,
208 |             },
209 |         };
210 |         input_sender.send(left_input.clone()).await.unwrap();
211 |         tokio::time::sleep(Duration::from_millis(250)).await;
212 | 
213 |         assert_eq!(fanin_output_receiver.len(), 2, "Output should have 2 items");
214 | 
215 |         for _ in 0..2 {
216 |             let result = receive_with_timeout(&mut fanin_output_receiver, 100)
217 |                 .await
218 |                 .unwrap();
219 | 
220 |             assert_eq!(
221 |                 result.data,
222 |                 make_test_structs(3),
223 |                 "Output should be the same as input"
224 |             );
225 |         }
226 | 
227 |         let graph = fanout_builder.graph;
228 |         let dot = graph.dot();
229 |         println!("{dot:}");
230 |         //first_handle.abort();
231 |         //second_handle.abort();
232 |     }
233 | }
234 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/README.md:
--------------------------------------------------------------------------------
 1 | # Postgres crate
 2 | 
 3 | ## About 
 4 | This crate provides a Postgres implementation for the integration layer between the Indexer SDK and Postgres. Features included are tracking the last processed version, retrieving the start version, and validating the chain id. The key components of this crate are core schema and models, Diesel utility functions, and trait implementations. 
 5 | 
 6 | ## How to use
 7 | 1. Install Postgres and Diesel CLI
 8 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`:
 9 | ```
10 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] }
11 | ```
12 | 3. Copy the `src/db` folder into where you are managing your Diesel migrations.


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/README.md:
--------------------------------------------------------------------------------
 1 | # Custom processor function 
 2 | 
 3 | Utility function that lets you create a Postgres processor. It works by running the code in `run_processor` method and applying a `process_function` on each transaction. 
 4 | 
 5 | ## How to use
 6 | 1. Install Postgres and Diesel CLI
 7 | 2. Add the `aptos-indexer-processor-sdk` crate with the `postgres_full` feature in the `[dependencies]` section of your `Config.toml`:
 8 | ```
 9 | aptos-indexer-processor-sdk = { git = "https://github.com/aptos-labs/aptos-indexer-processor-sdk.git", rev = "{COMMIT_HASH}", features = ["postgres_full"] }
10 | ```
11 | 3. Setup Diesel and define your DB migrations. 
12 | 4. In `main.rs`, call the `process` function with your indexing logic. You'll need to implement this part:
13 | ```
14 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("/path/to/src/db/migrations");
15 | process(
16 |     "processor_name".to_string(),
17 |     MIGRATIONS, 
18 |     async |transactions, conn_pool| {
19 |         // Implement your indexing logic
20 |     },
21 | )
22 | .await?;
23 | ```
24 | The `process` function is an abstraction around a regular SDK processor. 
25 | 
26 | It runs your db migrations, validates the chain id, connects to Transaction Stream, tracks the last successful version, and processes transactions using your custom indexing logic. 
27 | 
28 | See [`postgres-basic-events-example`](https://github.com/aptos-labs/aptos-indexer-processor-sdk/tree/main/examples/postgres-basic-events-example) for an example on how to use this function to create a simple processor that writes events to Postgres. 
29 | 
30 | 5. Construct a `config.yaml` file with this example:
31 | ```
32 | # This is a template yaml for the processor
33 | health_check_port: 8085
34 | server_config:
35 |   transaction_stream_config:
36 |     indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443"
37 |     auth_token: "AUTH_TOKEN"
38 |     request_name_header: "PROCESSOR_NAME"
39 |     starting_version: 0
40 |   postgres_config:
41 |     connection_string: postgresql://postgres:@localhost:5432/example
42 | ```
43 | 6. Run processor using this command `cargo run -p postgres-basic-events-example -- -c /path/to/config.yaml`
44 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_function.rs:
--------------------------------------------------------------------------------
  1 | use super::basic_processor_step::BasicProcessorStep;
  2 | use crate::{
  3 |     aptos_indexer_transaction_stream::TransactionStreamConfig,
  4 |     builder::ProcessorBuilder,
  5 |     common_steps::{
  6 |         TransactionStreamStep, VersionTrackerStep, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS,
  7 |     },
  8 |     postgres::{
  9 |         subconfigs::postgres_config::PostgresConfig,
 10 |         utils::{
 11 |             checkpoint::{
 12 |                 get_starting_version, PostgresChainIdChecker, PostgresProcessorStatusSaver,
 13 |             },
 14 |             database::{new_db_pool, run_migrations, ArcDbPool},
 15 |         },
 16 |         SDK_MIGRATIONS,
 17 |     },
 18 |     server_framework::{
 19 |         load, register_probes_and_metrics_handler, setup_logging, setup_panic_handler,
 20 |         GenericConfig, ServerArgs,
 21 |     },
 22 |     traits::IntoRunnableStep,
 23 |     utils::{chain_id_check::check_or_update_chain_id, errors::ProcessorError},
 24 | };
 25 | use anyhow::Result;
 26 | use aptos_protos::transaction::v1::Transaction;
 27 | use clap::Parser;
 28 | use diesel_migrations::EmbeddedMigrations;
 29 | use serde::{Deserialize, Serialize};
 30 | use tracing::info;
 31 | 
 32 | #[derive(Clone, Debug, Deserialize, Serialize)]
 33 | #[serde(deny_unknown_fields)]
 34 | pub struct ProcessConfig {
 35 |     pub transaction_stream_config: TransactionStreamConfig,
 36 |     pub postgres_config: PostgresConfig,
 37 | }
 38 | 
 39 | /// Processes transactions with a custom handler function.
 40 | pub async fn process<F, Fut>(
 41 |     processor_name: String,
 42 |     embedded_migrations: EmbeddedMigrations,
 43 |     process_function: F,
 44 | ) -> Result<()>
 45 | where
 46 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + Sync + 'static,
 47 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
 48 | {
 49 |     let args = ServerArgs::parse();
 50 |     setup_logging();
 51 |     setup_panic_handler();
 52 |     let config = load::<GenericConfig<ProcessConfig>>(&args.config_path)?;
 53 |     let handle = tokio::runtime::Handle::current();
 54 | 
 55 |     let health_port = config.health_check_port;
 56 |     let additional_labels = config.metrics_config.additional_labels.clone();
 57 |     // Start liveness and readiness probes.
 58 |     let task_handler = handle.spawn(async move {
 59 |         register_probes_and_metrics_handler(health_port, additional_labels).await;
 60 |         anyhow::Ok(())
 61 |     });
 62 |     let main_task_handler = handle.spawn(async move {
 63 |         run_processor(
 64 |             processor_name,
 65 |             config.server_config.transaction_stream_config,
 66 |             config.server_config.postgres_config,
 67 |             embedded_migrations,
 68 |             process_function,
 69 |         )
 70 |         .await
 71 |     });
 72 |     tokio::select! {
 73 |         res = task_handler => {
 74 |             res.expect("Probes and metrics handler unexpectedly exited")
 75 |         },
 76 |         res = main_task_handler => {
 77 |             res.expect("Main task handler unexpectedly exited")
 78 |         },
 79 |     }
 80 | }
 81 | 
 82 | async fn run_processor<F, Fut>(
 83 |     processor_name: String,
 84 |     transaction_stream_config: TransactionStreamConfig,
 85 |     postgres_config: PostgresConfig,
 86 |     embedded_migrations: EmbeddedMigrations,
 87 |     process_function: F,
 88 | ) -> Result<()>
 89 | where
 90 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + Sync + 'static,
 91 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
 92 | {
 93 |     // Create a connection pool
 94 |     let db_pool = new_db_pool(
 95 |         &postgres_config.connection_string,
 96 |         Some(postgres_config.db_pool_size),
 97 |     )
 98 |     .await
 99 |     .expect("Failed to create connection pool");
100 | 
101 |     // Run user migrations
102 |     run_migrations(
103 |         postgres_config.connection_string.clone(),
104 |         db_pool.clone(),
105 |         embedded_migrations,
106 |     )
107 |     .await;
108 | 
109 |     // Run SDK migrations
110 |     run_migrations(
111 |         postgres_config.connection_string.clone(),
112 |         db_pool.clone(),
113 |         SDK_MIGRATIONS,
114 |     )
115 |     .await;
116 | 
117 |     check_or_update_chain_id(
118 |         &transaction_stream_config,
119 |         &PostgresChainIdChecker::new(db_pool.clone()),
120 |     )
121 |     .await?;
122 | 
123 |     // Merge the starting version from config and the latest processed version from the DB
124 |     let starting_version = get_starting_version(
125 |         processor_name.as_str(),
126 |         transaction_stream_config.clone(),
127 |         db_pool.clone(),
128 |     )
129 |     .await?;
130 | 
131 |     // Define processor steps
132 |     let transaction_stream_config = transaction_stream_config.clone();
133 |     let transaction_stream = TransactionStreamStep::new(TransactionStreamConfig {
134 |         starting_version: Some(starting_version),
135 |         ..transaction_stream_config
136 |     })
137 |     .await?;
138 |     let basic_processor_step = BasicProcessorStep {
139 |         process_function,
140 |         conn_pool: db_pool.clone(),
141 |     };
142 |     let processor_status_saver =
143 |         PostgresProcessorStatusSaver::new(processor_name.as_str(), db_pool.clone());
144 |     let version_tracker =
145 |         VersionTrackerStep::new(processor_status_saver, DEFAULT_UPDATE_PROCESSOR_STATUS_SECS);
146 | 
147 |     // Connect processor steps together
148 |     let (_, buffer_receiver) =
149 |         ProcessorBuilder::new_with_inputless_first_step(transaction_stream.into_runnable_step())
150 |             .connect_to(basic_processor_step.into_runnable_step(), 10)
151 |             .connect_to(version_tracker.into_runnable_step(), 10)
152 |             .end_and_return_output_receiver(10);
153 | 
154 |     // (Optional) Parse the results
155 |     loop {
156 |         match buffer_receiver.recv().await {
157 |             Ok(_) => {},
158 |             Err(_) => {
159 |                 info!("Channel is closed");
160 |                 return Ok(());
161 |             },
162 |         }
163 |     }
164 | }
165 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/basic_processor_step.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     postgres::utils::database::ArcDbPool,
 3 |     traits::{AsyncRunType, AsyncStep, NamedStep, Processable},
 4 |     types::transaction_context::TransactionContext,
 5 |     utils::errors::ProcessorError,
 6 | };
 7 | use anyhow::Result;
 8 | use aptos_protos::transaction::v1::Transaction;
 9 | use async_trait::async_trait;
10 | 
11 | // Basic process step that runs a process function on each transaction
12 | pub struct BasicProcessorStep<F, Fut>
13 | where
14 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + 'static,
15 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
16 | {
17 |     pub process_function: F,
18 |     pub conn_pool: ArcDbPool,
19 | }
20 | 
21 | #[async_trait]
22 | impl<F, Fut> Processable for BasicProcessorStep<F, Fut>
23 | where
24 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + 'static,
25 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
26 | {
27 |     type Input = Vec<Transaction>;
28 |     type Output = ();
29 |     type RunType = AsyncRunType;
30 | 
31 |     async fn process(
32 |         &mut self,
33 |         transactions: TransactionContext<Vec<Transaction>>,
34 |     ) -> Result<Option<TransactionContext<()>>, ProcessorError> {
35 |         (self.process_function)(transactions.data, self.conn_pool.clone())
36 |             .await
37 |             .map_err(|e| ProcessorError::ProcessError {
38 |                 message: format!("Processing transactionsfailed: {e:?}"),
39 |             })?;
40 |         Ok(Some(TransactionContext {
41 |             data: (), // Stub out data since it's not used in the next step
42 |             metadata: transactions.metadata,
43 |         }))
44 |     }
45 | }
46 | 
47 | impl<F, Fut> AsyncStep for BasicProcessorStep<F, Fut>
48 | where
49 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + 'static,
50 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
51 | {
52 | }
53 | 
54 | impl<F, Fut> NamedStep for BasicProcessorStep<F, Fut>
55 | where
56 |     F: FnMut(Vec<Transaction>, ArcDbPool) -> Fut + Send + 'static,
57 |     Fut: std::future::Future<Output = Result<(), ProcessorError>> + Send + 'static,
58 | {
59 |     fn name(&self) -> String {
60 |         "BasicProcessorStep".to_string()
61 |     }
62 | }
63 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/basic_processor/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod basic_processor_function;
2 | pub mod basic_processor_step;
3 | 
4 | pub use basic_processor_function::process;
5 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/diesel.toml:
--------------------------------------------------------------------------------
 1 | # For documentation on how to configure this file,
 2 | # see https://diesel.rs/guides/configuring-diesel-cli
 3 | 
 4 | [print_schema]
 5 | file = "processor_metadata_schema.rs"
 6 | schema = "processor_metadata"
 7 | 
 8 | [migrations_directory]
 9 | dir = "migrations"
10 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/down.sql:
--------------------------------------------------------------------------------
1 | -- This file was automatically created by Diesel to setup helper functions
2 | -- and other internal bookkeeping. This file is safe to edit, any future
3 | -- changes will be added to existing projects as new migrations.
4 | 
5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
6 | DROP FUNCTION IF EXISTS diesel_set_updated_at();
7 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/00000000000000_diesel_initial_setup/up.sql:
--------------------------------------------------------------------------------
 1 | -- This file was automatically created by Diesel to setup helper functions
 2 | -- and other internal bookkeeping. This file is safe to edit, any future
 3 | -- changes will be added to existing projects as new migrations.
 4 | 
 5 | 
 6 | 
 7 | 
 8 | -- Sets up a trigger for the given table to automatically set a column called
 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included
10 | -- in the modified columns)
11 | --
12 | -- # Example
13 | --
14 | -- ```sql
15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
16 | --
17 | -- SELECT diesel_manage_updated_at('users');
18 | -- ```
19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
20 | BEGIN
21 |     EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
22 |                     FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
23 | END;
24 | $$ LANGUAGE plpgsql;
25 | 
26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
27 | BEGIN
28 |     IF (
29 |         NEW IS DISTINCT FROM OLD AND
30 |         NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
31 |     ) THEN
32 |         NEW.updated_at := current_timestamp;
33 |     END IF;
34 |     RETURN NEW;
35 | END;
36 | $$ LANGUAGE plpgsql;
37 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/down.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE processor_metadata.processor_status IF EXISTS;
2 | DROP TABLE processor_metadata.ledger_infos IF EXISTS;
3 | DROP SCHEMA processor_metadata IF EXISTS;


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/migrations/2025-03-06-201942_create_core_schema/up.sql:
--------------------------------------------------------------------------------
 1 | CREATE SCHEMA IF NOT EXISTS processor_metadata;
 2 | 
 3 | -- Tracks latest processed version per processor
 4 | CREATE TABLE IF NOT EXISTS processor_metadata.processor_status (
 5 |   processor VARCHAR(100) UNIQUE PRIMARY KEY NOT NULL,
 6 |   last_success_version BIGINT NOT NULL,
 7 |   last_updated TIMESTAMP NOT NULL DEFAULT NOW(),
 8 |   last_transaction_timestamp TIMESTAMP NULL
 9 | );
10 | 
11 | -- Tracks chain id
12 | CREATE TABLE IF NOT EXISTS processor_metadata.ledger_infos (chain_id BIGINT UNIQUE PRIMARY KEY NOT NULL);
13 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/db/processor_metadata_schema.rs:
--------------------------------------------------------------------------------
 1 | // @generated automatically by Diesel CLI.
 2 | 
 3 | pub mod processor_metadata {
 4 |     diesel::table! {
 5 |         processor_metadata.ledger_infos (chain_id) {
 6 |             chain_id -> Int8,
 7 |         }
 8 |     }
 9 | 
10 |     diesel::table! {
11 |         processor_metadata.processor_status (processor) {
12 |             #[max_length = 100]
13 |             processor -> Varchar,
14 |             last_success_version -> Int8,
15 |             last_updated -> Timestamp,
16 |             last_transaction_timestamp -> Nullable<Timestamp>,
17 |         }
18 |     }
19 | 
20 |     diesel::allow_tables_to_appear_in_same_query!(ledger_infos, processor_status,);
21 | }
22 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/mod.rs:
--------------------------------------------------------------------------------
 1 | use diesel_migrations::{embed_migrations, EmbeddedMigrations};
 2 | 
 3 | pub mod basic_processor;
 4 | pub mod models;
 5 | pub mod subconfigs;
 6 | pub mod utils;
 7 | 
 8 | #[path = "db/processor_metadata_schema.rs"]
 9 | pub mod processor_metadata_schema;
10 | 
11 | pub const SDK_MIGRATIONS: EmbeddedMigrations = embed_migrations!("./src/postgres/db/migrations");
12 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/models/ledger_info.rs:
--------------------------------------------------------------------------------
 1 | // Copyright © Aptos Foundation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #![allow(clippy::extra_unused_lifetimes)]
 5 | 
 6 | use crate::postgres::{
 7 |     processor_metadata_schema::processor_metadata::ledger_infos, utils::database::DbPoolConnection,
 8 | };
 9 | use diesel::{Identifiable, Insertable, OptionalExtension, QueryDsl, Queryable};
10 | use diesel_async::RunQueryDsl;
11 | 
12 | #[derive(Debug, Identifiable, Insertable, Queryable)]
13 | #[diesel(table_name = ledger_infos)]
14 | #[diesel(primary_key(chain_id))]
15 | pub struct LedgerInfo {
16 |     pub chain_id: i64,
17 | }
18 | 
19 | impl LedgerInfo {
20 |     pub async fn get(conn: &mut DbPoolConnection<'_>) -> diesel::QueryResult<Option<Self>> {
21 |         ledger_infos::table
22 |             .select(ledger_infos::all_columns)
23 |             .first::<Self>(conn)
24 |             .await
25 |             .optional()
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/models/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod ledger_info;
2 | pub mod processor_status;
3 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/models/processor_status.rs:
--------------------------------------------------------------------------------
 1 | // Copyright © Aptos Foundation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #![allow(clippy::extra_unused_lifetimes)]
 5 | 
 6 | use crate::postgres::{
 7 |     processor_metadata_schema::processor_metadata::processor_status,
 8 |     utils::database::DbPoolConnection,
 9 | };
10 | use diesel::{AsChangeset, ExpressionMethods, Insertable, OptionalExtension, QueryDsl, Queryable};
11 | use diesel_async::RunQueryDsl;
12 | 
13 | #[derive(AsChangeset, Debug, Insertable)]
14 | #[diesel(table_name = processor_status)]
15 | /// Only tracking the latest version successfully processed
16 | pub struct ProcessorStatus {
17 |     pub processor: String,
18 |     pub last_success_version: i64,
19 |     pub last_transaction_timestamp: Option<chrono::NaiveDateTime>,
20 | }
21 | 
22 | #[derive(AsChangeset, Debug, Queryable)]
23 | #[diesel(table_name = processor_status)]
24 | /// Only tracking the latest version successfully processed
25 | pub struct ProcessorStatusQuery {
26 |     pub processor: String,
27 |     pub last_success_version: i64,
28 |     pub last_updated: chrono::NaiveDateTime,
29 |     pub last_transaction_timestamp: Option<chrono::NaiveDateTime>,
30 | }
31 | 
32 | impl ProcessorStatusQuery {
33 |     pub async fn get_by_processor(
34 |         processor_name: &str,
35 |         conn: &mut DbPoolConnection<'_>,
36 |     ) -> diesel::QueryResult<Option<Self>> {
37 |         processor_status::table
38 |             .filter(processor_status::processor.eq(processor_name))
39 |             .first::<Self>(conn)
40 |             .await
41 |             .optional()
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod postgres_config;
2 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/subconfigs/postgres_config.rs:
--------------------------------------------------------------------------------
 1 | use serde::{Deserialize, Serialize};
 2 | 
 3 | #[derive(Clone, Debug, Deserialize, Serialize)]
 4 | #[serde(deny_unknown_fields)]
 5 | pub struct PostgresConfig {
 6 |     pub connection_string: String,
 7 |     // Size of the pool for writes/reads to the DB. Limits maximum number of queries in flight
 8 |     #[serde(default = "PostgresConfig::default_db_pool_size")]
 9 |     pub db_pool_size: u32,
10 | }
11 | 
12 | impl PostgresConfig {
13 |     pub const fn default_db_pool_size() -> u32 {
14 |         150
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/utils/checkpoint.rs:
--------------------------------------------------------------------------------
  1 | use super::database::{execute_with_better_error, execute_with_better_error_conn, ArcDbPool};
  2 | use crate::{
  3 |     aptos_indexer_transaction_stream::{utils::time::parse_timestamp, TransactionStreamConfig},
  4 |     common_steps::ProcessorStatusSaver,
  5 |     postgres::{
  6 |         models::{
  7 |             ledger_info::LedgerInfo,
  8 |             processor_status::{ProcessorStatus, ProcessorStatusQuery},
  9 |         },
 10 |         processor_metadata_schema::processor_metadata::{ledger_infos, processor_status},
 11 |     },
 12 |     types::transaction_context::TransactionContext,
 13 |     utils::{chain_id_check::ChainIdChecker, errors::ProcessorError},
 14 | };
 15 | use anyhow::{Context, Result};
 16 | use async_trait::async_trait;
 17 | use diesel::{query_dsl::methods::FilterDsl, upsert::excluded, ExpressionMethods};
 18 | 
 19 | /// A trait implementation of ChainIdChecker for Postgres.
 20 | pub struct PostgresChainIdChecker {
 21 |     pub db_pool: ArcDbPool,
 22 | }
 23 | 
 24 | impl PostgresChainIdChecker {
 25 |     pub fn new(db_pool: ArcDbPool) -> Self {
 26 |         Self { db_pool }
 27 |     }
 28 | }
 29 | 
 30 | #[async_trait]
 31 | impl ChainIdChecker for PostgresChainIdChecker {
 32 |     async fn save_chain_id(&self, chain_id: u64) -> Result<()> {
 33 |         let mut conn = self
 34 |             .db_pool
 35 |             .get()
 36 |             .await
 37 |             .context("Error getting db connection")?;
 38 |         execute_with_better_error_conn(
 39 |             &mut conn,
 40 |             diesel::insert_into(ledger_infos::table)
 41 |                 .values(LedgerInfo {
 42 |                     chain_id: chain_id as i64,
 43 |                 })
 44 |                 .on_conflict_do_nothing(),
 45 |         )
 46 |         .await
 47 |         .context("Error updating chain_id!")?;
 48 |         Ok(())
 49 |     }
 50 | 
 51 |     async fn get_chain_id(&self) -> Result<Option<u64>> {
 52 |         let mut conn = self.db_pool.get().await?;
 53 |         let maybe_existing_chain_id = LedgerInfo::get(&mut conn)
 54 |             .await?
 55 |             .map(|li| li.chain_id as u64);
 56 |         Ok(maybe_existing_chain_id)
 57 |     }
 58 | }
 59 | 
 60 | /// A trait implementation of ProcessorStatusSaver for Postgres.
 61 | pub struct PostgresProcessorStatusSaver {
 62 |     pub db_pool: ArcDbPool,
 63 |     pub processor_name: String,
 64 | }
 65 | 
 66 | impl PostgresProcessorStatusSaver {
 67 |     pub fn new(processor_name: &str, db_pool: ArcDbPool) -> Self {
 68 |         Self {
 69 |             db_pool,
 70 |             processor_name: processor_name.to_string(),
 71 |         }
 72 |     }
 73 | }
 74 | 
 75 | #[async_trait]
 76 | impl ProcessorStatusSaver for PostgresProcessorStatusSaver {
 77 |     async fn save_processor_status(
 78 |         &self,
 79 |         last_success_batch: &TransactionContext<()>,
 80 |     ) -> Result<(), ProcessorError> {
 81 |         let last_success_version = last_success_batch.metadata.end_version as i64;
 82 |         let last_transaction_timestamp = last_success_batch
 83 |             .metadata
 84 |             .end_transaction_timestamp
 85 |             .as_ref()
 86 |             .map(|t| parse_timestamp(t, last_success_batch.metadata.end_version as i64))
 87 |             .map(|t| t.naive_utc());
 88 |         let status = ProcessorStatus {
 89 |             processor: self.processor_name.clone(),
 90 |             last_success_version,
 91 |             last_transaction_timestamp,
 92 |         };
 93 | 
 94 |         // Save regular processor status to the database
 95 |         execute_with_better_error(
 96 |             self.db_pool.clone(),
 97 |             diesel::insert_into(processor_status::table)
 98 |                 .values(&status)
 99 |                 .on_conflict(processor_status::processor)
100 |                 .do_update()
101 |                 .set((
102 |                     processor_status::last_success_version
103 |                         .eq(excluded(processor_status::last_success_version)),
104 |                     processor_status::last_updated.eq(excluded(processor_status::last_updated)),
105 |                     processor_status::last_transaction_timestamp
106 |                         .eq(excluded(processor_status::last_transaction_timestamp)),
107 |                 ))
108 |                 .filter(
109 |                     processor_status::last_success_version
110 |                         .le(excluded(processor_status::last_success_version)),
111 |                 ),
112 |         )
113 |         .await?;
114 |         Ok(())
115 |     }
116 | }
117 | 
118 | pub async fn get_starting_version(
119 |     processor_name: &str,
120 |     transaction_stream_config: TransactionStreamConfig,
121 |     conn_pool: ArcDbPool,
122 | ) -> Result<u64> {
123 |     let mut conn = conn_pool.get().await?;
124 |     let latest_processed_version =
125 |         ProcessorStatusQuery::get_by_processor(processor_name, &mut conn)
126 |             .await?
127 |             .map(|ps| ps.last_success_version as u64);
128 |     // If nothing checkpointed, return the `starting_version` from the config, or 0 if not set.
129 |     Ok(latest_processed_version.unwrap_or(transaction_stream_config.starting_version.unwrap_or(0)))
130 | }
131 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/utils/database.rs:
--------------------------------------------------------------------------------
  1 | // Copyright © Aptos Foundation
  2 | // SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | //! Database-related functions
  5 | #![allow(clippy::extra_unused_lifetimes)]
  6 | 
  7 | use crate::utils::{convert::remove_null_bytes, errors::ProcessorError};
  8 | use ahash::AHashMap;
  9 | use diesel::{query_builder::QueryFragment, ConnectionResult, QueryResult};
 10 | use diesel_async::{
 11 |     pooled_connection::{
 12 |         bb8::{Pool, PooledConnection},
 13 |         AsyncDieselConnectionManager, ManagerConfig, PoolError,
 14 |     },
 15 |     AsyncPgConnection, RunQueryDsl,
 16 | };
 17 | use diesel_migrations::{EmbeddedMigrations, MigrationHarness};
 18 | use futures_util::{future::BoxFuture, FutureExt};
 19 | use std::sync::Arc;
 20 | use tracing::{info, warn};
 21 | 
 22 | pub type Backend = diesel::pg::Pg;
 23 | 
 24 | pub type MyDbConnection = AsyncPgConnection;
 25 | pub type DbPool = Pool<MyDbConnection>;
 26 | pub type ArcDbPool = Arc<DbPool>;
 27 | pub type DbPoolConnection<'a> = PooledConnection<'a, MyDbConnection>;
 28 | 
 29 | pub const DEFAULT_MAX_POOL_SIZE: u32 = 150;
 30 | 
 31 | // the max is actually u16::MAX but we see that when the size is too big we get an overflow error so reducing it a bit
 32 | pub const MAX_DIESEL_PARAM_SIZE: usize = (u16::MAX / 2) as usize;
 33 | 
 34 | /// This function will clean the data for postgres. Currently it has support for removing
 35 | /// null bytes from strings but in the future we will add more functionality.
 36 | pub fn clean_data_for_db<T: serde::Serialize + for<'de> serde::Deserialize<'de>>(
 37 |     items: Vec<T>,
 38 |     should_remove_null_bytes: bool,
 39 | ) -> Vec<T> {
 40 |     if should_remove_null_bytes {
 41 |         items.iter().map(remove_null_bytes).collect()
 42 |     } else {
 43 |         items
 44 |     }
 45 | }
 46 | 
 47 | fn establish_connection(database_url: &str) -> BoxFuture<ConnectionResult<AsyncPgConnection>> {
 48 |     use native_tls::{Certificate, TlsConnector};
 49 |     use postgres_native_tls::MakeTlsConnector;
 50 | 
 51 |     (async move {
 52 |         let (url, cert_path) = parse_and_clean_db_url(database_url);
 53 |         let cert = std::fs::read(cert_path.unwrap()).expect("Could not read certificate");
 54 | 
 55 |         let cert = Certificate::from_pem(&cert).expect("Could not parse certificate");
 56 |         let connector = TlsConnector::builder()
 57 |             .danger_accept_invalid_certs(true)
 58 |             .add_root_certificate(cert)
 59 |             .build()
 60 |             .expect("Could not build TLS connector");
 61 |         let connector = MakeTlsConnector::new(connector);
 62 | 
 63 |         let (client, connection) = tokio_postgres::connect(&url, connector)
 64 |             .await
 65 |             .expect("Could not connect to database");
 66 |         tokio::spawn(async move {
 67 |             if let Err(e) = connection.await {
 68 |                 eprintln!("connection error: {e}");
 69 |             }
 70 |         });
 71 |         AsyncPgConnection::try_from(client).await
 72 |     })
 73 |     .boxed()
 74 | }
 75 | 
 76 | fn parse_and_clean_db_url(url: &str) -> (String, Option<String>) {
 77 |     let mut db_url = url::Url::parse(url).expect("Could not parse database url");
 78 |     let mut cert_path = None;
 79 | 
 80 |     let mut query = "".to_string();
 81 |     db_url.query_pairs().for_each(|(k, v)| {
 82 |         if k == "sslrootcert" {
 83 |             cert_path = Some(v.parse().unwrap());
 84 |         } else {
 85 |             query.push_str(&format!("{k}={v}&"));
 86 |         }
 87 |     });
 88 |     db_url.set_query(Some(&query));
 89 | 
 90 |     (db_url.to_string(), cert_path)
 91 | }
 92 | 
 93 | pub async fn new_db_pool(
 94 |     database_url: &str,
 95 |     max_pool_size: Option<u32>,
 96 | ) -> Result<ArcDbPool, PoolError> {
 97 |     let (_url, cert_path) = parse_and_clean_db_url(database_url);
 98 | 
 99 |     let config = if cert_path.is_some() {
100 |         let mut config = ManagerConfig::<MyDbConnection>::default();
101 |         config.custom_setup = Box::new(|conn| Box::pin(establish_connection(conn)));
102 |         AsyncDieselConnectionManager::<MyDbConnection>::new_with_config(database_url, config)
103 |     } else {
104 |         AsyncDieselConnectionManager::<MyDbConnection>::new(database_url)
105 |     };
106 |     let pool = Pool::builder()
107 |         .max_size(max_pool_size.unwrap_or(DEFAULT_MAX_POOL_SIZE))
108 |         .build(config)
109 |         .await?;
110 |     Ok(Arc::new(pool))
111 | }
112 | 
113 | pub async fn execute_in_chunks<U, T>(
114 |     conn: ArcDbPool,
115 |     build_query: fn(Vec<T>) -> U,
116 |     items_to_insert: &[T],
117 |     chunk_size: usize,
118 | ) -> Result<(), ProcessorError>
119 | where
120 |     U: QueryFragment<Backend> + diesel::query_builder::QueryId + Send + 'static,
121 |     T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone + Send + 'static,
122 | {
123 |     let tasks = items_to_insert
124 |         .chunks(chunk_size)
125 |         .map(|chunk| {
126 |             let conn = conn.clone();
127 |             let items = chunk.to_vec();
128 |             tokio::spawn(async move {
129 |                 let query = build_query(items.clone());
130 |                 execute_or_retry_cleaned(conn, build_query, items, query).await
131 |             })
132 |         })
133 |         .collect::<Vec<_>>();
134 | 
135 |     let results = futures_util::future::try_join_all(tasks)
136 |         .await
137 |         .expect("Task panicked executing in chunks");
138 |     for res in results {
139 |         res?
140 |     }
141 | 
142 |     Ok(())
143 | }
144 | 
145 | /// Returns the entry for the config hashmap, or the default field count for the insert.
146 | ///
147 | /// Given diesel has a limit of how many parameters can be inserted in a single operation (u16::MAX),
148 | /// we default to chunk an array of items based on how many columns are in the table.
149 | pub fn get_config_table_chunk_size<T: field_count::FieldCount>(
150 |     table_name: &str,
151 |     per_table_chunk_sizes: &AHashMap<String, usize>,
152 | ) -> usize {
153 |     let chunk_size = per_table_chunk_sizes.get(table_name).copied();
154 |     chunk_size.unwrap_or_else(|| MAX_DIESEL_PARAM_SIZE / T::field_count())
155 | }
156 | 
157 | pub async fn execute_with_better_error<U>(
158 |     pool: ArcDbPool,
159 |     query: U,
160 | ) -> Result<usize, ProcessorError>
161 | where
162 |     U: QueryFragment<Backend> + diesel::query_builder::QueryId + Send,
163 | {
164 |     let debug_string = diesel::debug_query::<Backend, _>(&query).to_string();
165 |     let conn = &mut pool.get().await.map_err(|e| {
166 |         warn!("Error getting connection from pool: {:?}", e);
167 |         ProcessorError::DBStoreError {
168 |             message: format!("{e:#}"),
169 |             query: Some(debug_string.clone()),
170 |         }
171 |     })?;
172 |     query
173 |         .execute(conn)
174 |         .await
175 |         .inspect_err(|e| {
176 |             warn!("Error running query: {:?}\n{:?}", e, debug_string);
177 |         })
178 |         .map_err(|e| ProcessorError::DBStoreError {
179 |             message: format!("{e:#}"),
180 |             query: Some(debug_string),
181 |         })
182 | }
183 | 
184 | pub async fn execute_with_better_error_conn<U>(
185 |     conn: &mut MyDbConnection,
186 |     query: U,
187 | ) -> QueryResult<usize>
188 | where
189 |     U: QueryFragment<Backend> + diesel::query_builder::QueryId + Send,
190 | {
191 |     let debug_string = diesel::debug_query::<Backend, _>(&query).to_string();
192 |     tracing::debug!("Executing query: {:?}", debug_string);
193 |     let res = query.execute(conn).await;
194 |     if let Err(ref e) = res {
195 |         tracing::warn!("Error running query: {:?}\n{:?}", e, debug_string);
196 |     }
197 |     res
198 | }
199 | 
200 | async fn execute_or_retry_cleaned<U, T>(
201 |     conn: ArcDbPool,
202 |     build_query: fn(Vec<T>) -> U,
203 |     items: Vec<T>,
204 |     query: U,
205 | ) -> Result<(), ProcessorError>
206 | where
207 |     U: QueryFragment<Backend> + diesel::query_builder::QueryId + Send,
208 |     T: serde::Serialize + for<'de> serde::Deserialize<'de> + Clone,
209 | {
210 |     match execute_with_better_error(conn.clone(), query).await {
211 |         Ok(_) => {},
212 |         Err(_) => {
213 |             let cleaned_items = clean_data_for_db(items, true);
214 |             let cleaned_query = build_query(cleaned_items);
215 |             match execute_with_better_error(conn.clone(), cleaned_query).await {
216 |                 Ok(_) => {},
217 |                 Err(e) => {
218 |                     return Err(e);
219 |                 },
220 |             }
221 |         },
222 |     }
223 |     Ok(())
224 | }
225 | 
226 | pub fn run_pending_migrations<DB: diesel::backend::Backend>(
227 |     conn: &mut impl MigrationHarness<DB>,
228 |     migrations: EmbeddedMigrations,
229 | ) {
230 |     conn.run_pending_migrations(migrations)
231 |         .expect("[Parser] Migrations failed!");
232 | }
233 | 
234 | // For the normal processor build we just use standard Diesel with the postgres
235 | // feature enabled (which uses libpq under the hood, hence why we named the feature
236 | // this way).
237 | #[cfg(feature = "postgres_full")]
238 | pub async fn run_migrations(
239 |     postgres_connection_string: String,
240 |     _conn_pool: ArcDbPool,
241 |     migrations: EmbeddedMigrations,
242 | ) {
243 |     use diesel::{Connection, PgConnection};
244 | 
245 |     info!("Running migrations: {:?}", postgres_connection_string);
246 |     let migration_time = std::time::Instant::now();
247 |     let mut conn =
248 |         PgConnection::establish(&postgres_connection_string).expect("migrations failed!");
249 |     run_pending_migrations(&mut conn, migrations);
250 |     info!(
251 |         duration_in_secs = migration_time.elapsed().as_secs_f64(),
252 |         "[Parser] Finished migrations"
253 |     );
254 | }
255 | 
256 | // If the postgres_full feature isn't enabled, we use diesel async instead. This is used by
257 | // the CLI for the local testnet, where we cannot tolerate the libpq dependency.
258 | #[cfg(not(feature = "postgres_full"))]
259 | pub async fn run_migrations(
260 |     postgres_connection_string: String,
261 |     conn_pool: ArcDbPool,
262 |     migrations: EmbeddedMigrations,
263 | ) {
264 |     use diesel_async::async_connection_wrapper::AsyncConnectionWrapper;
265 | 
266 |     info!("Running migrations: {:?}", postgres_connection_string);
267 |     let conn = conn_pool
268 |         // We need to use this since AsyncConnectionWrapper doesn't know how to
269 |         // work with a pooled connection.
270 |         .dedicated_connection()
271 |         .await
272 |         .expect("[Parser] Failed to get connection");
273 |     // We use spawn_blocking since run_pending_migrations is a blocking function.
274 |     tokio::task::spawn_blocking(move || {
275 |         // This lets us use the connection like a normal diesel connection. See more:
276 |         // https://docs.rs/diesel-async/latest/diesel_async/async_connection_wrapper/type.AsyncConnectionWrapper.html
277 |         let mut conn: AsyncConnectionWrapper<diesel_async::AsyncPgConnection> =
278 |             AsyncConnectionWrapper::from(conn);
279 |         run_pending_migrations(&mut conn, migrations);
280 |     })
281 |     .await
282 |     .expect("[Parser] Failed to run migrations");
283 | }
284 | 
285 | pub struct DbContext<'a> {
286 |     pub conn: DbPoolConnection<'a>,
287 |     pub query_retries: u32,
288 |     pub query_retry_delay_ms: u64,
289 | }
290 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/postgres/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod checkpoint;
2 | pub mod database;
3 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/server_framework.rs:
--------------------------------------------------------------------------------
  1 | // Copyright © Aptos Foundation
  2 | 
  3 | use crate::{
  4 |     instrumented_channel::channel_metrics::init_channel_metrics_registry,
  5 |     utils::step_metrics::init_step_metrics_registry,
  6 | };
  7 | use anyhow::{Context, Result};
  8 | #[cfg(target_os = "linux")]
  9 | use aptos_system_utils::profiling::start_cpu_profiling;
 10 | use autometrics::settings::AutometricsSettings;
 11 | use axum::{http::StatusCode, response::IntoResponse, routing::get, Router};
 12 | use backtrace::Backtrace;
 13 | use clap::Parser;
 14 | use prometheus_client::registry::Registry;
 15 | use serde::{de::DeserializeOwned, Deserialize, Serialize};
 16 | // TODO: remove deprecated lint when new clippy nightly is released
 17 | #[allow(deprecated)]
 18 | use std::{fs::File, io::Read, panic::PanicInfo, path::PathBuf, process};
 19 | use tokio::runtime::Handle;
 20 | use tracing::error;
 21 | use tracing_subscriber::EnvFilter;
 22 | 
 23 | /// ServerArgs bootstraps a server with all common pieces. And then triggers the run method for
 24 | /// the specific service.
 25 | #[derive(Parser)]
 26 | pub struct ServerArgs {
 27 |     #[clap(short, long, value_parser)]
 28 |     pub config_path: PathBuf,
 29 | }
 30 | 
 31 | impl ServerArgs {
 32 |     pub async fn run<C>(&self, handle: Handle) -> Result<()>
 33 |     where
 34 |         C: RunnableConfig,
 35 |     {
 36 |         // Set up the server.
 37 |         setup_logging();
 38 |         setup_panic_handler();
 39 |         let config = load::<GenericConfig<C>>(&self.config_path)?;
 40 |         run_server_with_config(config, handle).await
 41 |     }
 42 | }
 43 | 
 44 | /// Run a server and the necessary probes. For spawning these tasks, the user must
 45 | /// provide a handle to a runtime they already have.
 46 | pub async fn run_server_with_config<C>(config: GenericConfig<C>, handle: Handle) -> Result<()>
 47 | where
 48 |     C: RunnableConfig,
 49 | {
 50 |     let health_port = config.health_check_port;
 51 |     let additional_labels = config.metrics_config.additional_labels.clone();
 52 |     // Start liveness and readiness probes.
 53 |     let task_handler = handle.spawn(async move {
 54 |         register_probes_and_metrics_handler(health_port, additional_labels).await;
 55 |         anyhow::Ok(())
 56 |     });
 57 |     let main_task_handler = handle.spawn(async move { config.run().await });
 58 |     tokio::select! {
 59 |         res = task_handler => {
 60 |             res.expect("Probes and metrics handler unexpectedly exited")
 61 |         },
 62 |         res = main_task_handler => {
 63 |             res.expect("Main task handler unexpectedly exited")
 64 |         },
 65 |     }
 66 | }
 67 | 
 68 | #[derive(Deserialize, Debug, Serialize)]
 69 | pub struct GenericConfig<T> {
 70 |     // Shared configuration among all services.
 71 |     pub health_check_port: u16,
 72 | 
 73 |     #[serde(default)]
 74 |     pub metrics_config: MetricsConfig,
 75 | 
 76 |     // Specific configuration for each service.
 77 |     pub server_config: T,
 78 | }
 79 | 
 80 | #[derive(Clone, Deserialize, Debug, Default, Serialize)]
 81 | pub struct MetricsConfig {
 82 |     /// Additional labels to use for metrics.
 83 |     pub additional_labels: Vec<(String, String)>,
 84 | }
 85 | 
 86 | #[async_trait::async_trait]
 87 | impl<T> RunnableConfig for GenericConfig<T>
 88 | where
 89 |     T: RunnableConfig,
 90 | {
 91 |     async fn run(&self) -> Result<()> {
 92 |         self.server_config.run().await
 93 |     }
 94 | 
 95 |     fn get_server_name(&self) -> String {
 96 |         self.server_config.get_server_name()
 97 |     }
 98 | }
 99 | 
100 | /// RunnableConfig is a trait that all services must implement for their configuration.
101 | #[async_trait::async_trait]
102 | pub trait RunnableConfig: DeserializeOwned + Send + Sync + 'static {
103 |     async fn run(&self) -> Result<()>;
104 |     fn get_server_name(&self) -> String;
105 | }
106 | 
107 | /// Parse a yaml file into a struct.
108 | pub fn load<T: for<'de> Deserialize<'de>>(path: &PathBuf) -> Result<T> {
109 |     let mut file =
110 |         File::open(path).with_context(|| format!("failed to open the file at path: {path:?}",))?;
111 |     let mut contents = String::new();
112 |     file.read_to_string(&mut contents)
113 |         .with_context(|| format!("failed to read the file at path: {path:?}",))?;
114 |     serde_yaml::from_str::<T>(&contents).context("Unable to parse yaml file")
115 | }
116 | 
117 | #[derive(Debug, Serialize)]
118 | pub struct CrashInfo {
119 |     details: String,
120 |     backtrace: String,
121 | }
122 | 
123 | /// Invoke to ensure process exits on a thread panic.
124 | ///
125 | /// Tokio's default behavior is to catch panics and ignore them.  Invoking this function will
126 | /// ensure that all subsequent thread panics (even Tokio threads) will report the
127 | /// details/backtrace and then exit.
128 | pub fn setup_panic_handler() {
129 |     // TODO: remove deprecated lint when new clippy nightly is released
130 |     #[allow(deprecated)]
131 |     std::panic::set_hook(Box::new(move |pi: &PanicInfo<'_>| {
132 |         handle_panic(pi);
133 |     }));
134 | }
135 | 
136 | // Formats and logs panic information
137 | // TODO: remove deprecated lint when new clippy nightly is released
138 | #[allow(deprecated)]
139 | fn handle_panic(panic_info: &PanicInfo<'_>) {
140 |     // The Display formatter for a PanicInfo contains the message, payload and location.
141 |     let details = format!("{panic_info}",);
142 |     let backtrace = format!("{:#?}", Backtrace::new());
143 |     let info = CrashInfo { details, backtrace };
144 |     let crash_info = toml::to_string_pretty(&info).unwrap();
145 |     error!("{}", crash_info);
146 |     // TODO / HACK ALARM: Write crash info synchronously via eprintln! to ensure it is written before the process exits which error! doesn't guarantee.
147 |     // This is a workaround until https://github.com/aptos-labs/aptos-core/issues/2038 is resolved.
148 |     eprintln!("{crash_info}",);
149 |     // Kill the process
150 |     process::exit(12);
151 | }
152 | 
153 | /// Set up logging for the server.
154 | pub fn setup_logging() {
155 |     let env_filter = EnvFilter::try_from_default_env()
156 |         .or_else(|_| EnvFilter::try_new("info"))
157 |         .unwrap();
158 |     tracing_subscriber::fmt()
159 |         .json()
160 |         .with_file(true)
161 |         .with_line_number(true)
162 |         .with_thread_ids(true)
163 |         .with_target(false)
164 |         .with_thread_names(true)
165 |         .with_env_filter(env_filter)
166 |         .flatten_event(true)
167 |         .init();
168 | }
169 | 
170 | /// Register readiness and liveness probes and set up metrics endpoint.
171 | pub async fn register_probes_and_metrics_handler(
172 |     port: u16,
173 |     additional_labels: Vec<(String, String)>,
174 | ) {
175 |     let mut registry = Registry::with_labels(
176 |         additional_labels
177 |             .into_iter()
178 |             .map(|(k, v)| (k.into(), v.into())),
179 |     );
180 |     init_step_metrics_registry(&mut registry);
181 |     init_channel_metrics_registry(&mut registry);
182 |     AutometricsSettings::builder()
183 |         .prometheus_client_registry(registry)
184 |         .init();
185 | 
186 |     let router = Router::new()
187 |         .route("/readiness", get(StatusCode::OK))
188 |         .route("/metrics", get(metrics_handler));
189 | 
190 |     #[cfg(target_os = "linux")]
191 |     let router = router.merge(Router::new().route("/profilez", get(profilez_handler)));
192 | 
193 |     let listener = tokio::net::TcpListener::bind(format!("0.0.0.0:{port}",))
194 |         .await
195 |         .expect("Failed to bind TCP listener");
196 |     axum::serve(listener, router).await.unwrap();
197 | }
198 | 
199 | async fn metrics_handler() -> impl IntoResponse {
200 |     match autometrics::prometheus_exporter::encode_to_string() {
201 |         Ok(prometheus_client_rust_metrics) => (
202 |             StatusCode::OK,
203 |             [("Content-Type", "text/plain; version=0.0.4")],
204 |             prometheus_client_rust_metrics,
205 |         )
206 |             .into_response(),
207 |         Err(err) => (StatusCode::INTERNAL_SERVER_ERROR, format!("{err:?}",)).into_response(),
208 |     }
209 | }
210 | 
211 | #[cfg(target_os = "linux")]
212 | async fn profilez_handler() -> impl IntoResponse {
213 |     match start_cpu_profiling(10, 99, false).await {
214 |         Ok(body) => (
215 |             StatusCode::OK,
216 |             [
217 |                 ("Content-Length", body.len().to_string()),
218 |                 ("Content-Disposition", "inline".to_string()),
219 |                 ("Content-Type", "image/svg+xml".to_string()),
220 |             ],
221 |             body,
222 |         )
223 |             .into_response(),
224 |         Err(e) => (
225 |             StatusCode::INTERNAL_SERVER_ERROR,
226 |             format!("Profiling failed: {e:?}."),
227 |         )
228 |             .into_response(),
229 |     }
230 | }
231 | 
232 | #[cfg(test)]
233 | mod tests {
234 |     use super::*;
235 |     use std::io::Write;
236 |     use tempfile::tempdir;
237 | 
238 |     #[derive(Clone, Debug, Deserialize, Serialize)]
239 |     #[serde(deny_unknown_fields)]
240 |     pub struct TestConfig {
241 |         test: u32,
242 |         test_name: String,
243 |     }
244 | 
245 |     #[async_trait::async_trait]
246 |     impl RunnableConfig for TestConfig {
247 |         async fn run(&self) -> Result<()> {
248 |             assert_eq!(self.test, 123);
249 |             assert_eq!(self.test_name, "test");
250 |             Ok(())
251 |         }
252 | 
253 |         fn get_server_name(&self) -> String {
254 |             self.test_name.clone()
255 |         }
256 |     }
257 | 
258 |     #[test]
259 |     fn test_random_config_creation() {
260 |         let dir = tempdir().expect("tempdir failure");
261 | 
262 |         let file_path = dir.path().join("testing_yaml.yaml");
263 |         let mut file = File::create(&file_path).expect("create failure");
264 |         let raw_yaml_content = r#"
265 |             health_check_port: 12345
266 |             server_config:
267 |                 test: 123
268 |                 test_name: "test"
269 |         "#;
270 |         writeln!(file, "{raw_yaml_content}").expect("write_all failure");
271 | 
272 |         let config = load::<GenericConfig<TestConfig>>(&file_path).unwrap();
273 |         assert_eq!(config.health_check_port, 12345);
274 |         assert_eq!(config.server_config.test, 123);
275 |         assert_eq!(config.server_config.test_name, "test");
276 |     }
277 | 
278 |     #[test]
279 |     fn verify_tool() {
280 |         use clap::CommandFactory;
281 |         ServerArgs::command().debug_assert()
282 |     }
283 | }
284 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/test/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod steps;
2 | pub mod utils;
3 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/test/steps/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod pass_through_step;
2 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/test/steps/pass_through_step.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     traits::{async_step::AsyncRunType, AsyncStep, NamedStep, Processable},
 3 |     types::transaction_context::TransactionContext,
 4 |     utils::errors::ProcessorError,
 5 | };
 6 | use anyhow::Result;
 7 | use async_trait::async_trait;
 8 | use std::marker::PhantomData;
 9 | 
10 | pub struct PassThroughStep<Input: Send + 'static> {
11 |     name: Option<String>,
12 |     _input: PhantomData<Input>,
13 | }
14 | 
15 | impl<Input: Send + 'static> Default for PassThroughStep<Input> {
16 |     fn default() -> Self {
17 |         Self {
18 |             name: None,
19 |             _input: PhantomData,
20 |         }
21 |     }
22 | }
23 | 
24 | impl<Input: Send + 'static> PassThroughStep<Input> {
25 |     pub fn new_named(name: String) -> Self {
26 |         Self {
27 |             name: Some(name),
28 |             _input: PhantomData,
29 |         }
30 |     }
31 | }
32 | 
33 | impl<Input: Send + 'static> AsyncStep for PassThroughStep<Input> {}
34 | 
35 | impl<Input: Send + 'static> NamedStep for PassThroughStep<Input> {
36 |     fn name(&self) -> String {
37 |         self.name
38 |             .clone()
39 |             .unwrap_or_else(|| "PassThroughStep".to_string())
40 |     }
41 | }
42 | 
43 | #[async_trait]
44 | impl<Input: Send + 'static> Processable for PassThroughStep<Input> {
45 |     type Input = Input;
46 |     type Output = Input;
47 |     type RunType = AsyncRunType;
48 | 
49 |     async fn process(
50 |         &mut self,
51 |         item: TransactionContext<Input>,
52 |     ) -> Result<Option<TransactionContext<Input>>, ProcessorError> {
53 |         Ok(Some(item))
54 |     }
55 | }
56 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/test/utils.rs:
--------------------------------------------------------------------------------
 1 | use instrumented_channel::InstrumentedAsyncReceiver;
 2 | use std::time::Duration;
 3 | 
 4 | pub async fn receive_with_timeout<T>(
 5 |     receiver: &mut InstrumentedAsyncReceiver<T>,
 6 |     timeout_ms: u64,
 7 | ) -> Option<T> {
 8 |     tokio::time::timeout(Duration::from_millis(timeout_ms), async {
 9 |         receiver.recv().await
10 |     })
11 |     .await
12 |     .unwrap()
13 |     .ok()
14 | }
15 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/testing_framework/cli_parser.rs:
--------------------------------------------------------------------------------
  1 | use once_cell::sync::Lazy;
  2 | use std::sync::Mutex;
  3 | 
  4 | #[derive(Debug, Clone)]
  5 | pub struct TestArgs {
  6 |     pub generate_output: bool,
  7 |     pub output_path: Option<String>,
  8 | }
  9 | 
 10 | // Define a global static to store the parsed arguments
 11 | static TEST_CONFIG: Lazy<Mutex<TestArgs>> = Lazy::new(|| {
 12 |     let args = parse_test_args();
 13 |     Mutex::new(args)
 14 | });
 15 | 
 16 | // Function to fetch global test args
 17 | pub fn get_test_config() -> (bool, Option<String>) {
 18 |     let test_args = TEST_CONFIG.lock().unwrap().clone();
 19 |     (test_args.generate_output, test_args.output_path)
 20 | }
 21 | 
 22 | pub fn parse_test_args() -> TestArgs {
 23 |     let raw_args: Vec<String> = std::env::args().collect();
 24 | 
 25 |     // Find the "--" separator, or default to include all args after the test name
 26 |     let clap_args_position = raw_args.iter().position(|arg| arg == "--");
 27 | 
 28 |     // Determine the starting position for custom arguments
 29 |     let custom_args_start = match clap_args_position {
 30 |         Some(position) => position + 1, // Start after the "--" if it exists
 31 |         None => 1,                      // Start after the test name, skip the first element
 32 |     };
 33 | 
 34 |     // Collect custom arguments based on determined start position
 35 |     let custom_args: Vec<String> = raw_args[custom_args_start..].to_vec();
 36 | 
 37 |     // Manually parse the "generate" flag
 38 |     let generate_flag = custom_args.contains(&"generate".to_string());
 39 | 
 40 |     // Manually parse the "--output-path" flag and get its associated value
 41 |     let output_path = custom_args
 42 |         .windows(2)
 43 |         .find(|args| args[0] == "output-path")
 44 |         .map(|args| args[1].clone());
 45 | 
 46 |     println!("Parsed generate flag: {generate_flag}",);
 47 |     println!(
 48 |         "Parsed output_path: {}",
 49 |         output_path.clone().unwrap_or_else(|| "None".to_string())
 50 |     );
 51 | 
 52 |     TestArgs {
 53 |         generate_output: generate_flag,
 54 |         output_path,
 55 |     }
 56 | }
 57 | 
 58 | #[cfg(test)]
 59 | mod tests {
 60 |     use super::*;
 61 |     pub fn parse_test_args_from_vec(args: Vec<String>) -> TestArgs {
 62 |         // Find the "--" separator (if it exists)
 63 |         let clap_args_position = args.iter().position(|arg| arg == "--");
 64 | 
 65 |         // Only pass the arguments that come after "--", if it exists
 66 |         let custom_args: Vec<String> = match clap_args_position {
 67 |             Some(position) => args[position + 1..].to_vec(), // Slice after `--`
 68 |             None => Vec::new(), // If no `--` is found, treat as no custom args
 69 |         };
 70 | 
 71 |         // Manually parse the "--generate" flag
 72 |         let generate_output_flag = custom_args.contains(&"generate".to_string());
 73 | 
 74 |         // Manually parse the "--output-path" flag and get its associated value
 75 |         let output_path = custom_args
 76 |             .windows(2)
 77 |             .find(|args| args[0] == "output-path")
 78 |             .map(|args| args[1].clone());
 79 | 
 80 |         println!("Parsed generate_output_flag: {generate_output_flag}");
 81 |         println!(
 82 |             "Parsed output_path: {}",
 83 |             output_path.clone().unwrap_or_else(|| "None".to_string())
 84 |         );
 85 | 
 86 |         TestArgs {
 87 |             generate_output: generate_output_flag,
 88 |             output_path,
 89 |         }
 90 |     }
 91 | 
 92 |     #[test]
 93 |     fn test_parse_generate_output_flag() {
 94 |         let args = vec![
 95 |             "test_binary".to_string(),
 96 |             "--".to_string(),
 97 |             "generate".to_string(),
 98 |         ];
 99 |         let parsed = parse_test_args_from_vec(args);
100 |         assert!(parsed.generate_output);
101 |         assert_eq!(parsed.output_path, None);
102 |     }
103 | 
104 |     #[test]
105 |     fn test_parse_output_path() {
106 |         let args = vec![
107 |             "test_binary".to_string(),
108 |             "--".to_string(),
109 |             "output-path".to_string(),
110 |             "/some/path".to_string(),
111 |         ];
112 |         let parsed = parse_test_args_from_vec(args);
113 |         assert!(!parsed.generate_output);
114 |         assert_eq!(parsed.output_path, Some("/some/path".to_string()));
115 |     }
116 | 
117 |     #[test]
118 |     fn test_parse_both_arguments() {
119 |         let args = vec![
120 |             "test_binary".to_string(),
121 |             "--".to_string(),
122 |             "generate".to_string(),
123 |             "output-path".to_string(),
124 |             "/some/other/path".to_string(),
125 |         ];
126 |         let parsed = parse_test_args_from_vec(args);
127 |         assert!(parsed.generate_output);
128 |         assert_eq!(parsed.output_path, Some("/some/other/path".to_string()));
129 |     }
130 | 
131 |     #[test]
132 |     fn test_parse_no_arguments() {
133 |         let args = vec!["test_binary".to_string()];
134 |         let parsed = parse_test_args_from_vec(args);
135 |         assert!(!parsed.generate_output);
136 |         assert_eq!(parsed.output_path, None);
137 |     }
138 | }
139 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/testing_framework/database.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::{Context, Result};
 2 | use async_trait::async_trait;
 3 | use testcontainers::{
 4 |     core::{IntoContainerPort, WaitFor},
 5 |     runners::AsyncRunner,
 6 |     ContainerAsync, GenericImage, ImageExt,
 7 | };
 8 | 
 9 | const POSTGRES_IMAGE: &str = "postgres";
10 | const POSTGRES_VERSION: &str = "14";
11 | const POSTGRES_PORT: u16 = 5432;
12 | const POSTGRES_DB: &str = "postgres";
13 | const POSTGRES_USER: &str = "postgres";
14 | const POSTGRES_PASSWORD: &str = "postgres";
15 | 
16 | #[async_trait]
17 | pub trait TestDatabase: Send + Sync {
18 |     /// Set up the test container using user-defined code.
19 |     async fn setup<'a>(&'a mut self) -> anyhow::Result<()>;
20 | 
21 |     /// Retrieve the database connection URL after setup.
22 |     fn get_db_url(&self) -> String;
23 | }
24 | 
25 | #[derive(Default)]
26 | pub struct PostgresTestDatabase {
27 |     connection_string: String,
28 |     postgres_container: Option<ContainerAsync<GenericImage>>,
29 | }
30 | 
31 | impl PostgresTestDatabase {
32 |     pub fn new() -> Self {
33 |         PostgresTestDatabase {
34 |             postgres_container: None,
35 |             connection_string: String::new(),
36 |         }
37 |     }
38 | 
39 |     /// Helper method to configure and start the Postgres container.
40 |     async fn start_postgres_container(&mut self) -> Result<ContainerAsync<GenericImage>> {
41 |         let postgres_image = GenericImage::new(POSTGRES_IMAGE, POSTGRES_VERSION)
42 |             .with_exposed_port(POSTGRES_PORT.tcp())
43 |             .with_wait_for(WaitFor::message_on_stderr(
44 |                 "database system is ready to accept connections",
45 |             ))
46 |             .with_env_var("POSTGRES_DB", POSTGRES_DB)
47 |             .with_env_var("POSTGRES_USER", POSTGRES_USER)
48 |             .with_env_var("POSTGRES_PASSWORD", POSTGRES_PASSWORD);
49 | 
50 |         let container = postgres_image
51 |             .start()
52 |             .await
53 |             .context("Failed to start Postgres container")?;
54 | 
55 |         Ok(container)
56 |     }
57 | 
58 |     /// Helper method to get the host and port information of the running container.
59 |     async fn get_connection_info(&self) -> Result<(String, u16)> {
60 |         let host = self
61 |             .postgres_container
62 |             .as_ref()
63 |             .context("Postgres container not initialized")?
64 |             .get_host()
65 |             .await
66 |             .context("Failed to get container host")?;
67 | 
68 |         let port = self
69 |             .postgres_container
70 |             .as_ref()
71 |             .context("Postgres container not initialized")?
72 |             .get_host_port_ipv4(5432)
73 |             .await
74 |             .context("Failed to get container port")?;
75 | 
76 |         Ok((host.to_string(), port))
77 |     }
78 | }
79 | 
80 | #[async_trait]
81 | impl TestDatabase for PostgresTestDatabase {
82 |     /// Set up the Postgres container and get the database connection URL.
83 |     async fn setup(&mut self) -> Result<()> {
84 |         self.postgres_container = Some(self.start_postgres_container().await?);
85 | 
86 |         let (host, port) = self.get_connection_info().await?;
87 | 
88 |         self.connection_string = format!("postgres://postgres:postgres@{host}:{port}/postgres");
89 |         Ok(())
90 |     }
91 | 
92 |     /// Retrieve the Postgres connection URL after the container has been set up.
93 |     fn get_db_url(&self) -> String {
94 |         self.connection_string.clone()
95 |     }
96 | }
97 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/testing_framework/mock_grpc.rs:
--------------------------------------------------------------------------------
  1 | use aptos_protos::indexer::v1::{
  2 |     raw_data_server::{RawData, RawDataServer},
  3 |     GetTransactionsRequest, ProcessedRange, TransactionsResponse,
  4 | };
  5 | use futures::Stream;
  6 | use std::{collections::HashMap, pin::Pin};
  7 | use tokio::time::{timeout, Duration};
  8 | use tokio_stream::wrappers::TcpListenerStream;
  9 | use tonic::{transport::Server, Request, Response, Status};
 10 | 
 11 | // Bind to port 0 to get a random available port
 12 | const GRPC_ADDRESS: &str = "127.0.0.1:0";
 13 | 
 14 | #[derive(Default)]
 15 | pub struct MockGrpcServer {
 16 |     pub transactions_response: Vec<TransactionsResponse>,
 17 |     pub chain_id: u64,
 18 | }
 19 | 
 20 | type ResponseStream = Pin<Box<dyn Stream<Item = Result<TransactionsResponse, Status>> + Send>>;
 21 | 
 22 | #[tonic::async_trait]
 23 | impl RawData for MockGrpcServer {
 24 |     type GetTransactionsStream = ResponseStream;
 25 | 
 26 |     async fn get_transactions(
 27 |         &self,
 28 |         req: Request<GetTransactionsRequest>,
 29 |     ) -> Result<Response<Self::GetTransactionsStream>, Status> {
 30 |         let request = req.into_inner();
 31 |         let starting_version = request.starting_version.unwrap_or(0); // Default to 0 if starting_version is not provided
 32 |         let transactions_count = request.transactions_count.unwrap_or(1); // Default to 1 if transactions_count is not provided
 33 |         let mut collected_transactions = Vec::new();
 34 | 
 35 |         let mut transaction_map = HashMap::new();
 36 |         for transaction_response in &self.transactions_response {
 37 |             for tx in &transaction_response.transactions {
 38 |                 transaction_map.insert(tx.version, tx.clone());
 39 |             }
 40 |         }
 41 | 
 42 |         let mut sorted_transactions: Vec<_> = transaction_map
 43 |             .iter()
 44 |             .filter(|(&version, _)| version >= starting_version)
 45 |             .map(|(_, tx)| tx.clone())
 46 |             .collect();
 47 |         sorted_transactions.sort_by_key(|tx| tx.version);
 48 | 
 49 |         collected_transactions.extend(
 50 |             sorted_transactions
 51 |                 .into_iter()
 52 |                 .take(transactions_count as usize),
 53 |         );
 54 | 
 55 |         let result = if !collected_transactions.is_empty() {
 56 |             TransactionsResponse {
 57 |                 transactions: collected_transactions,
 58 |                 chain_id: Some(self.chain_id),
 59 |                 processed_range: Some(ProcessedRange {
 60 |                     first_version: starting_version,
 61 |                     last_version: starting_version + transactions_count - 1,
 62 |                 }),
 63 |             }
 64 |         } else {
 65 |             // Return a default response with chain_id if no transactions are found
 66 |             let mut default_transaction_response = self.transactions_response[0].clone();
 67 |             default_transaction_response.chain_id = Some(self.chain_id);
 68 |             default_transaction_response
 69 |         };
 70 | 
 71 |         let stream = futures::stream::iter(vec![Ok(result)]);
 72 |         Ok(Response::new(Box::pin(stream)))
 73 |     }
 74 | }
 75 | 
 76 | impl MockGrpcServer {
 77 |     pub async fn run(self) -> anyhow::Result<u16> {
 78 |         let listener = tokio::net::TcpListener::bind(GRPC_ADDRESS).await?;
 79 |         let bound_addr = listener.local_addr()?; // Get the actual bound address
 80 | 
 81 |         // Convert the TcpListener into a TcpListenerStream (wrapping it with `?` to handle potential errors)
 82 |         let stream = TcpListenerStream::new(listener);
 83 | 
 84 |         // Build and start the gRPC server without graceful shutdown
 85 |         let server = Server::builder().add_service(
 86 |             RawDataServer::new(self)
 87 |                 .accept_compressed(tonic::codec::CompressionEncoding::Zstd) // Enable compression for incoming requests
 88 |                 .send_compressed(tonic::codec::CompressionEncoding::Zstd), // Compress outgoing responses
 89 |         );
 90 | 
 91 |         tokio::spawn(async move {
 92 |             // This server will run until the process is killed or the task is stopped
 93 |             let server_timeout = Duration::from_secs(60);
 94 | 
 95 |             match timeout(server_timeout, server.serve_with_incoming(stream)).await {
 96 |                 Ok(result) => match result {
 97 |                     Ok(_) => {
 98 |                         println!("Server stopped successfully.");
 99 |                     },
100 |                     Err(e) => {
101 |                         eprintln!("Failed to run gRPC server: {e:?}");
102 |                     },
103 |                 },
104 |                 Err(_) => {
105 |                     eprintln!("Server timed out and was stopped.");
106 |                 },
107 |             }
108 |         });
109 | 
110 |         // Return the port number so it can be used by other parts of the program
111 |         let port = bound_addr.port();
112 |         println!("Server is running on port {port}",);
113 | 
114 |         Ok(port)
115 |     }
116 | }
117 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/testing_framework/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod cli_parser;
2 | pub mod database;
3 | mod mock_grpc;
4 | pub mod sdk_test_context;
5 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/README.md:
--------------------------------------------------------------------------------
 1 | # Traits
 2 | 
 3 | ## Async Step
 4 | 
 5 | The `async_step.rs` file provides tools for handling asynchronous steps in processing. 
 6 | 
 7 | Implement `AsyncStep` for steps that process data directly without buffering. 
 8 | 
 9 | ## Pollable Async Step
10 | 
11 | The `pollable_async_step.rs` file provides tools for handling steps that can be polled asynchronously.
12 | 
13 | Implement `PollableAsyncStep` for steps that buffer or poll data over a duration of time in an asynchronous manner.
14 | 
15 | ## Processable
16 | The `processable.rs` file defines the `Processable` trait, which each step implements.
17 | 
18 | ## Processor trait 
19 | The `processor_trait.rs` defines `ProcessorTrait`, which each processor implements. 
20 | 
21 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/async_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{
  2 |     traits::{
  3 |         processable::RunnableStepType, IntoRunnableStep, NamedStep, Processable, RunnableStep,
  4 |     },
  5 |     types::transaction_context::TransactionContext,
  6 |     utils::step_metrics::{StepMetricLabels, StepMetricsBuilder},
  7 | };
  8 | use async_trait::async_trait;
  9 | use bigdecimal::Zero;
 10 | use instrumented_channel::{
 11 |     instrumented_bounded_channel, InstrumentedAsyncReceiver, InstrumentedAsyncSender,
 12 | };
 13 | use std::time::{Duration, Instant};
 14 | use tokio::task::JoinHandle;
 15 | use tracing::{error, info, warn};
 16 | 
 17 | #[async_trait]
 18 | pub trait AsyncStep
 19 | where
 20 |     Self: Processable + Send + Sized + 'static,
 21 | {
 22 | }
 23 | 
 24 | pub struct AsyncRunType;
 25 | 
 26 | impl RunnableStepType for AsyncRunType {}
 27 | 
 28 | pub struct RunnableAsyncStep<Step>
 29 | where
 30 |     Step: AsyncStep,
 31 | {
 32 |     pub step: Step,
 33 | }
 34 | 
 35 | impl<Step> RunnableAsyncStep<Step>
 36 | where
 37 |     Step: AsyncStep,
 38 | {
 39 |     pub fn new(step: Step) -> Self {
 40 |         Self { step }
 41 |     }
 42 | }
 43 | 
 44 | impl<Step> NamedStep for RunnableAsyncStep<Step>
 45 | where
 46 |     Step: 'static + AsyncStep + Send + Sized,
 47 | {
 48 |     fn name(&self) -> String {
 49 |         self.step.name()
 50 |     }
 51 | 
 52 |     fn type_name(&self) -> String {
 53 |         let step_type = std::any::type_name::<Step>().to_string();
 54 |         format!("{step_type} (via RunnableAsyncStep)",)
 55 |     }
 56 | }
 57 | 
 58 | impl<Step> IntoRunnableStep<Step::Input, Step::Output, Step, AsyncRunType> for Step
 59 | where
 60 |     Step: AsyncStep<RunType = AsyncRunType> + Send + Sized + 'static,
 61 | {
 62 |     fn into_runnable_step(self) -> impl RunnableStep<Step::Input, Step::Output> {
 63 |         RunnableAsyncStep::new(self)
 64 |     }
 65 | }
 66 | 
 67 | impl<Step> RunnableStep<Step::Input, Step::Output> for RunnableAsyncStep<Step>
 68 | where
 69 |     Step: AsyncStep + Send + Sized + 'static,
 70 | {
 71 |     fn spawn(
 72 |         self,
 73 |         input_receiver: Option<InstrumentedAsyncReceiver<TransactionContext<Step::Input>>>,
 74 |         output_channel_size: usize,
 75 |         _input_sender: Option<InstrumentedAsyncSender<TransactionContext<Step::Input>>>,
 76 |     ) -> (
 77 |         InstrumentedAsyncReceiver<TransactionContext<Step::Output>>,
 78 |         JoinHandle<()>,
 79 |     ) {
 80 |         let mut step = self.step;
 81 |         let step_name = step.name();
 82 |         let input_receiver = input_receiver.expect("Input receiver must be set");
 83 | 
 84 |         let (output_sender, output_receiver) =
 85 |             instrumented_bounded_channel(&step_name, output_channel_size);
 86 | 
 87 |         info!(step_name = step_name, "Spawning processing task");
 88 |         let handle = tokio::spawn(async move {
 89 |             loop {
 90 |                 let input_with_context = match input_receiver.recv().await {
 91 |                     Ok(input_with_context) => input_with_context,
 92 |                     Err(e) => {
 93 |                         // If the previous steps have finished and the channels have closed , we should break out of the loop
 94 |                         warn!(
 95 |                             step_name = step_name,
 96 |                             error = e.to_string(),
 97 |                             "No input received from channel"
 98 |                         );
 99 |                         break;
100 |                     },
101 |                 };
102 |                 let processing_duration = Instant::now();
103 |                 let output_with_context = match step.process(input_with_context).await {
104 |                     Ok(output_with_context) => output_with_context,
105 |                     Err(e) => {
106 |                         error!(
107 |                             step_name = step_name,
108 |                             error = e.to_string(),
109 |                             "Failed to process input"
110 |                         );
111 |                         break;
112 |                     },
113 |                 };
114 |                 if let Some(output_with_context) = output_with_context {
115 |                     match StepMetricsBuilder::default()
116 |                         .labels(StepMetricLabels {
117 |                             step_name: step.name(),
118 |                         })
119 |                         .latest_processed_version(output_with_context.metadata.end_version)
120 |                         .processed_transaction_latency(
121 |                             output_with_context.get_transaction_latency(),
122 |                         )
123 |                         .latest_transaction_timestamp(
124 |                             output_with_context.get_start_transaction_timestamp_unix(),
125 |                         )
126 |                         .num_transactions_processed_count(
127 |                             output_with_context.get_num_transactions(),
128 |                         )
129 |                         .processing_duration_in_secs(processing_duration.elapsed().as_secs_f64())
130 |                         .processed_size_in_bytes(output_with_context.metadata.total_size_in_bytes)
131 |                         .build()
132 |                     {
133 |                         Ok(mut metrics) => metrics.log_metrics(),
134 |                         Err(e) => {
135 |                             error!(
136 |                                 step_name = step_name,
137 |                                 error = e.to_string(),
138 |                                 "Failed to log metrics"
139 |                             );
140 |                             break;
141 |                         },
142 |                     }
143 |                     match output_sender.send(output_with_context).await {
144 |                         Ok(_) => (),
145 |                         Err(e) => {
146 |                             error!(
147 |                                 step_name = step_name,
148 |                                 error = e.to_string(),
149 |                                 "Error sending output to channel"
150 |                             );
151 |                             break;
152 |                         },
153 |                     }
154 |                 }
155 |             }
156 | 
157 |             // Wait for output channel to be empty before ending the task and closing the send channel
158 |             loop {
159 |                 let channel_size = output_sender.len();
160 |                 info!(
161 |                     step_name = step_name,
162 |                     channel_size = channel_size,
163 |                     "Waiting for output channel to be empty"
164 |                 );
165 |                 if channel_size.is_zero() {
166 |                     break;
167 |                 }
168 |                 tokio::time::sleep(Duration::from_millis(100)).await;
169 |             }
170 |             info!(
171 |                 step_name = step_name,
172 |                 "Output channel is empty. Closing send channel."
173 |             );
174 |         });
175 | 
176 |         (output_receiver, handle)
177 |     }
178 | }
179 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/instrumentation.rs:
--------------------------------------------------------------------------------
 1 | use std::marker::PhantomData;
 2 | 
 3 | pub trait NamedStep {
 4 |     fn name(&self) -> String;
 5 | 
 6 |     fn type_name(&self) -> String {
 7 |         std::any::type_name::<Self>().to_string()
 8 |     }
 9 | }
10 | 
11 | pub struct StepInstrumentor<Step>
12 | where
13 |     Step: NamedStep + Send + Sized + 'static,
14 | {
15 |     _step: PhantomData<Step>,
16 | }
17 | 
18 | impl<Step> Default for StepInstrumentor<Step>
19 | where
20 |     Step: NamedStep + Send + Sized + 'static,
21 | {
22 |     fn default() -> Self {
23 |         Self {
24 |             _step: Default::default(),
25 |         }
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/into_runnable_step.rs:
--------------------------------------------------------------------------------
 1 | use crate::traits::{Processable, RunnableStep};
 2 | 
 3 | pub trait IntoRunnableStep<
 4 |     Input,
 5 |     Output,
 6 |     Step: Processable,
 7 |     RunnableType = <Step as Processable>::RunType,
 8 | > where
 9 |     Self: Send + Sized + 'static,
10 |     Input: Send + 'static,
11 |     Output: Send + 'static,
12 | {
13 |     fn into_runnable_step(self) -> impl RunnableStep<Input, Output>;
14 | }
15 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod async_step;
 2 | pub mod instrumentation;
 3 | pub mod into_runnable_step;
 4 | pub mod pollable_async_step;
 5 | pub mod processable;
 6 | pub mod processor_trait;
 7 | pub mod runnable_step;
 8 | 
 9 | // Re-export the structs and traits
10 | pub use async_step::{AsyncRunType, AsyncStep, RunnableAsyncStep};
11 | pub use instrumentation::NamedStep;
12 | pub use into_runnable_step::IntoRunnableStep;
13 | pub use pollable_async_step::{PollableAsyncRunType, PollableAsyncStep, RunnablePollableStep};
14 | pub use processable::{Processable, RunnableStepType};
15 | pub use runnable_step::{RunnableStep, RunnableStepWithInputReceiver};
16 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/processable.rs:
--------------------------------------------------------------------------------
 1 | use crate::{
 2 |     traits::NamedStep, types::transaction_context::TransactionContext,
 3 |     utils::errors::ProcessorError,
 4 | };
 5 | use anyhow::Result;
 6 | use async_trait::async_trait;
 7 | 
 8 | /// Trait to convince the compiler that different step types are mutually exclusive
 9 | pub trait RunnableStepType {}
10 | 
11 | // This is a dummy implementation for the unit type
12 | impl RunnableStepType for () {}
13 | 
14 | #[async_trait]
15 | pub trait Processable
16 | where
17 |     Self: NamedStep + Send + Sized + 'static,
18 | {
19 |     type Input: Send + 'static;
20 |     type Output: Send + 'static;
21 |     // This is to convince the compiler of mutual exclusivity of different step impls
22 |     type RunType: RunnableStepType;
23 | 
24 |     /// Lifecycle methods
25 |     async fn init(&mut self) {}
26 |     async fn cleanup(
27 |         &mut self,
28 |     ) -> Result<Option<Vec<TransactionContext<Self::Output>>>, ProcessorError> {
29 |         Ok(None)
30 |     }
31 | 
32 |     /// Processes a batch of input items and returns a batch of output items.
33 |     async fn process(
34 |         &mut self,
35 |         items: TransactionContext<Self::Input>,
36 |     ) -> Result<Option<TransactionContext<Self::Output>>, ProcessorError>;
37 | }
38 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/processor_trait.rs:
--------------------------------------------------------------------------------
1 | use async_trait::async_trait;
2 | 
3 | #[async_trait]
4 | pub trait ProcessorTrait: Send + Sync {
5 |     fn name(&self) -> &'static str;
6 |     async fn run_processor(&self) -> anyhow::Result<()>;
7 | }
8 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/traits/runnable_step.rs:
--------------------------------------------------------------------------------
  1 | use crate::{traits::NamedStep, types::transaction_context::TransactionContext};
  2 | use instrumented_channel::{InstrumentedAsyncReceiver, InstrumentedAsyncSender};
  3 | use std::marker::PhantomData;
  4 | use tokio::task::JoinHandle;
  5 | 
  6 | pub trait RunnableStep<Input, Output>: NamedStep
  7 | where
  8 |     Self: Send + Sized + 'static,
  9 |     Input: Send + 'static,
 10 |     Output: Send + 'static,
 11 | {
 12 |     #[allow(clippy::too_long_first_doc_paragraph)]
 13 |     /// Runs the step, forever, with the given input receiver and returns the output receiver and the join handle.
 14 |     fn spawn(
 15 |         self,
 16 |         input_receiver: Option<InstrumentedAsyncReceiver<TransactionContext<Input>>>,
 17 |         output_channel_size: usize,
 18 |         _input_sender: Option<InstrumentedAsyncSender<TransactionContext<Input>>>,
 19 |     ) -> (
 20 |         InstrumentedAsyncReceiver<TransactionContext<Output>>,
 21 |         JoinHandle<()>,
 22 |     );
 23 | 
 24 |     fn add_input_receiver(
 25 |         self,
 26 |         input_receiver: InstrumentedAsyncReceiver<TransactionContext<Input>>,
 27 |     ) -> RunnableStepWithInputReceiver<Input, Output, Self> {
 28 |         RunnableStepWithInputReceiver::new(input_receiver, self)
 29 |     }
 30 | 
 31 |     fn type_name(&self) -> String {
 32 |         <Self as NamedStep>::type_name(self)
 33 |     }
 34 | }
 35 | 
 36 | pub struct RunnableStepWithInputReceiver<Input, Output, Step>
 37 | where
 38 |     Input: Send + 'static,
 39 |     Output: Send + 'static,
 40 |     Step: RunnableStep<Input, Output>,
 41 | {
 42 |     pub input_receiver: InstrumentedAsyncReceiver<TransactionContext<Input>>,
 43 |     pub step: Step,
 44 |     _output: PhantomData<Output>,
 45 |     pub _input_sender: Option<InstrumentedAsyncSender<TransactionContext<Input>>>,
 46 | }
 47 | 
 48 | impl<Input, Output, Step> RunnableStepWithInputReceiver<Input, Output, Step>
 49 | where
 50 |     Input: Send + 'static,
 51 |     Output: Send + 'static,
 52 |     Step: RunnableStep<Input, Output>,
 53 | {
 54 |     pub fn new(
 55 |         input_receiver: InstrumentedAsyncReceiver<TransactionContext<Input>>,
 56 |         step: Step,
 57 |     ) -> Self {
 58 |         Self {
 59 |             input_receiver,
 60 |             step,
 61 |             _output: Default::default(),
 62 |             _input_sender: None,
 63 |         }
 64 |     }
 65 | 
 66 |     #[allow(clippy::too_long_first_doc_paragraph)]
 67 |     /// This should only be used for the inputless first step to keep the async sender in scope so the channel stays alive.
 68 |     pub fn add_input_sender(
 69 |         mut self,
 70 |         _input_sender: InstrumentedAsyncSender<TransactionContext<Input>>,
 71 |     ) -> Self {
 72 |         self._input_sender = Some(_input_sender);
 73 |         self
 74 |     }
 75 | }
 76 | 
 77 | impl<Input, Output, Step> NamedStep for RunnableStepWithInputReceiver<Input, Output, Step>
 78 | where
 79 |     Input: 'static + Send,
 80 |     Output: 'static + Send,
 81 |     Step: RunnableStep<Input, Output>,
 82 | {
 83 |     fn name(&self) -> String {
 84 |         self.step.name()
 85 |     }
 86 | 
 87 |     fn type_name(&self) -> String {
 88 |         format!(
 89 |             "{} (via RunnableStepWithInputReceiver)",
 90 |             RunnableStep::type_name(&self.step)
 91 |         )
 92 |     }
 93 | }
 94 | 
 95 | impl<Input, Output, Step> RunnableStep<Input, Output>
 96 |     for RunnableStepWithInputReceiver<Input, Output, Step>
 97 | where
 98 |     Input: Send + 'static,
 99 |     Output: Send + 'static,
100 |     Step: RunnableStep<Input, Output>,
101 | {
102 |     fn spawn(
103 |         self,
104 |         input_receiver: Option<InstrumentedAsyncReceiver<TransactionContext<Input>>>,
105 |         channel_size: usize,
106 |         _input_sender: Option<InstrumentedAsyncSender<TransactionContext<Input>>>,
107 |     ) -> (
108 |         InstrumentedAsyncReceiver<TransactionContext<Output>>,
109 |         JoinHandle<()>,
110 |     ) {
111 |         if input_receiver.is_some() {
112 |             panic!("Input receiver already set for {:?}", self.name());
113 |         }
114 |         self.step
115 |             .spawn(Some(self.input_receiver), channel_size, _input_sender)
116 |     }
117 | 
118 |     fn add_input_receiver(
119 |         self,
120 |         _input_receiver: InstrumentedAsyncReceiver<TransactionContext<Input>>,
121 |     ) -> RunnableStepWithInputReceiver<Input, Output, Self> {
122 |         panic!("Input receiver already set for {:?}", self.name());
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/types/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod transaction_context;
2 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/types/transaction_context.rs:
--------------------------------------------------------------------------------
 1 | use aptos_indexer_transaction_stream::utils::time::{
 2 |     time_diff_since_pb_timestamp_in_secs, timestamp_to_unixtime,
 3 | };
 4 | 
 5 | /// Contains processed data and associated transaction metadata.
 6 | ///
 7 | /// The processed data is extracted from transactions and the
 8 | /// TransactionContext contains additional metadata about which transactions the extracted
 9 | /// data originated from. The metadata is used for metrics and logging purposes.
10 | #[derive(Clone, Default)]
11 | pub struct TransactionContext<T> {
12 |     pub data: T,
13 |     pub metadata: TransactionMetadata,
14 | }
15 | 
16 | impl<T> TransactionContext<T> {
17 |     pub fn get_num_transactions(&self) -> u64 {
18 |         self.metadata.end_version - self.metadata.start_version + 1
19 |     }
20 | 
21 |     pub fn get_start_transaction_timestamp_unix(&self) -> Option<f64> {
22 |         self.metadata
23 |             .start_transaction_timestamp
24 |             .as_ref()
25 |             .map(timestamp_to_unixtime)
26 |     }
27 | 
28 |     pub fn get_transaction_latency(&self) -> Option<f64> {
29 |         self.metadata
30 |             .start_transaction_timestamp
31 |             .as_ref()
32 |             .map(time_diff_since_pb_timestamp_in_secs)
33 |     }
34 | }
35 | 
36 | impl<T> Ord for TransactionContext<T> {
37 |     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
38 |         self.metadata
39 |             .start_version
40 |             .cmp(&other.metadata.start_version)
41 |     }
42 | }
43 | 
44 | impl<T> PartialOrd for TransactionContext<T> {
45 |     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
46 |         Some(self.cmp(other))
47 |     }
48 | }
49 | 
50 | impl<T> Eq for TransactionContext<T> {}
51 | 
52 | impl<T> PartialEq for TransactionContext<T> {
53 |     fn eq(&self, other: &Self) -> bool {
54 |         self.metadata.start_version == other.metadata.start_version
55 |     }
56 | }
57 | 
58 | // Metadata about a batch of transactions
59 | #[derive(Clone, Default)]
60 | pub struct TransactionMetadata {
61 |     pub start_version: u64,
62 |     pub end_version: u64,
63 |     pub start_transaction_timestamp: Option<aptos_protos::util::timestamp::Timestamp>,
64 |     pub end_transaction_timestamp: Option<aptos_protos::util::timestamp::Timestamp>,
65 |     pub total_size_in_bytes: u64,
66 | }
67 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/README.md:
--------------------------------------------------------------------------------
 1 | # Utils
 2 | 
 3 | ## Chain ID Check
 4 | 
 5 | The `chain_id_check.rs` file provides tools to manage and verify the chain ID during processing. It helps to ensure the processor is indexing the correct chain ID. 
 6 | 
 7 | ### ChainIdChecker Trait
 8 | 
 9 | This trait has two main functions that need to be implemented:
10 | 
11 | - `save_chain_id`: Saves the current chain ID to storage.
12 | - `get_chain_id`: Retrieves the chain ID from storage.
13 | 
14 | 
15 | ### `check_or_update_chain_id` Function
16 | 
17 | This function checks if the chain ID from a `TransactionStream` matches the one in storage. If they match, processing continues. If not, it updates the storage with the new chain ID. This helps prevent processing errors due to mismatched chain IDs.
18 | 
19 | Use this function in your processor to manage the chain ID. 
20 | 
21 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/chain_id_check.rs:
--------------------------------------------------------------------------------
 1 | use super::errors::ProcessorError;
 2 | use anyhow::Result;
 3 | use aptos_indexer_transaction_stream::{TransactionStream, TransactionStreamConfig};
 4 | use async_trait::async_trait;
 5 | use tracing::info;
 6 | 
 7 | #[async_trait]
 8 | pub trait ChainIdChecker {
 9 |     /// Save the chain ID to storage. This is used to track the chain ID that's being processed
10 |     /// and prevents the processor from processing the wrong chain.
11 |     async fn save_chain_id(&self, chain_id: u64) -> Result<()>;
12 | 
13 |     /// Get the chain ID from storage. This is used to track the chain ID that's being processed
14 |     /// and prevents the processor from processing the wrong chain.
15 |     async fn get_chain_id(&self) -> Result<Option<u64>>;
16 | }
17 | 
18 | /// Verify the chain id from TransactionStream against the database.
19 | pub async fn check_or_update_chain_id<T>(
20 |     transaction_stream_config: &TransactionStreamConfig,
21 |     chain_id_checker: &T,
22 | ) -> Result<u64, ProcessorError>
23 | where
24 |     T: ChainIdChecker,
25 | {
26 |     info!("Checking if chain id is correct");
27 |     let maybe_existing_chain_id =
28 |         chain_id_checker
29 |             .get_chain_id()
30 |             .await
31 |             .map_err(|e| ProcessorError::ChainIdCheckError {
32 |                 message: format!("Error getting chain id from db: {e:?}"),
33 |             })?;
34 | 
35 |     let transaction_stream = TransactionStream::new(transaction_stream_config.clone())
36 |         .await
37 |         .map_err(|e| ProcessorError::ChainIdCheckError {
38 |             message: format!("Error initializing transaction stream: {e:?}"),
39 |         })?;
40 |     let grpc_chain_id =
41 |         transaction_stream
42 |             .get_chain_id()
43 |             .await
44 |             .map_err(|e| ProcessorError::ChainIdCheckError {
45 |                 message: format!("Error getting chain id from transaction stream: {e:?}"),
46 |             })?;
47 | 
48 |     match maybe_existing_chain_id {
49 |         Some(chain_id) => {
50 |             if chain_id != grpc_chain_id {
51 |                 return Err(ProcessorError::ChainIdCheckError {
52 |                     message: format!(
53 |                         "Wrong chain id detected! Trying to index chain {grpc_chain_id} now but existing data is for chain {chain_id}",
54 |                     ),
55 |                 });
56 |             }
57 | 
58 |             info!(
59 |                 chain_id = chain_id,
60 |                 "Chain id matches! Continue to index...",
61 |             );
62 |             Ok(chain_id)
63 |         },
64 |         None => {
65 |             info!(
66 |                 chain_id = grpc_chain_id,
67 |                 "Saving chain id to db, continue to index..."
68 |             );
69 |             chain_id_checker
70 |                 .save_chain_id(grpc_chain_id)
71 |                 .await
72 |                 .map_err(|e| ProcessorError::ChainIdCheckError {
73 |                     message: format!("Error saving chain id to db: {e:?}"),
74 |                 })?;
75 |             Ok(grpc_chain_id)
76 |         },
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/constants.rs:
--------------------------------------------------------------------------------
 1 | // Copyright © Aptos Foundation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | //! Constant values useful for indexing.
 5 | 
 6 | use once_cell::sync::Lazy;
 7 | 
 8 | /// Type string for AptosCoin.
 9 | pub const APTOS_COIN_TYPE_STR: &str = "0x1::aptos_coin::AptosCoin";
10 | 
11 | pub static APT_METADATA_ADDRESS_RAW: Lazy<[u8; 32]> = Lazy::new(|| {
12 |     let mut addr = [0u8; 32];
13 |     addr[31] = 10u8;
14 |     addr
15 | });
16 | 
17 | pub static APT_METADATA_ADDRESS_HEX: Lazy<String> =
18 |     Lazy::new(|| format!("0x{}", hex::encode(*APT_METADATA_ADDRESS_RAW)));
19 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/convert.rs:
--------------------------------------------------------------------------------
  1 | // Copyright © Aptos Foundation
  2 | // SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | //! Helpers related to basic conversion like string manipulation, converting between
  5 | //! number types, BCS, and hashing.
  6 | 
  7 | use bigdecimal::{BigDecimal, Signed, ToPrimitive, Zero};
  8 | use serde::{Deserialize, Deserializer};
  9 | use serde_json::Value;
 10 | use std::str::FromStr;
 11 | use tiny_keccak::{Hasher, Sha3};
 12 | 
 13 | #[allow(clippy::too_long_first_doc_paragraph)]
 14 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string).
 15 | pub fn standardize_address(handle: &str) -> String {
 16 |     if let Some(handle) = handle.strip_prefix("0x") {
 17 |         format!("0x{handle:0>64}")
 18 |     } else {
 19 |         format!("0x{handle:0>64}")
 20 |     }
 21 | }
 22 | 
 23 | #[allow(clippy::too_long_first_doc_paragraph)]
 24 | /// Standardizes an address / table handle to be a string with length 66 (0x+64 length hex string).
 25 | pub fn standardize_address_from_bytes(bytes: &[u8]) -> String {
 26 |     let encoded_bytes = hex::encode(bytes);
 27 |     standardize_address(&encoded_bytes)
 28 | }
 29 | 
 30 | /// Convert a hex string into a raw byte string. Any leading 0x will be stripped.
 31 | pub fn hex_to_raw_bytes(val: &str) -> anyhow::Result<Vec<u8>> {
 32 |     Ok(hex::decode(val.strip_prefix("0x").unwrap_or(val))?)
 33 | }
 34 | 
 35 | /// Truncate a string to a maximum number of characters.
 36 | pub fn truncate_str(val: &str, max_chars: usize) -> String {
 37 |     let mut trunc = val.to_string();
 38 |     trunc.truncate(max_chars);
 39 |     trunc
 40 | }
 41 | 
 42 | pub fn sha3_256(buffer: &[u8]) -> [u8; 32] {
 43 |     let mut output = [0; 32];
 44 |     let mut sha3 = Sha3::v256();
 45 |     sha3.update(buffer);
 46 |     sha3.finalize(&mut output);
 47 |     output
 48 | }
 49 | 
 50 | pub fn u64_to_bigdecimal(val: u64) -> BigDecimal {
 51 |     BigDecimal::from(val)
 52 | }
 53 | 
 54 | pub fn bigdecimal_to_u64(val: &BigDecimal) -> u64 {
 55 |     val.to_u64().expect("Unable to convert big decimal to u64")
 56 | }
 57 | 
 58 | pub fn ensure_not_negative(val: BigDecimal) -> BigDecimal {
 59 |     if val.is_negative() {
 60 |         return BigDecimal::zero();
 61 |     }
 62 |     val
 63 | }
 64 | 
 65 | /// Remove null bytes from a JSON object.
 66 | pub fn remove_null_bytes<T: serde::Serialize + for<'de> serde::Deserialize<'de>>(input: &T) -> T {
 67 |     let mut txn_json = serde_json::to_value(input).unwrap();
 68 |     recurse_remove_null_bytes_from_json(&mut txn_json);
 69 |     serde_json::from_value::<T>(txn_json).unwrap()
 70 | }
 71 | 
 72 | fn recurse_remove_null_bytes_from_json(sub_json: &mut Value) {
 73 |     match sub_json {
 74 |         Value::Array(array) => {
 75 |             for item in array {
 76 |                 recurse_remove_null_bytes_from_json(item);
 77 |             }
 78 |         },
 79 |         Value::Object(object) => {
 80 |             for (_key, value) in object {
 81 |                 recurse_remove_null_bytes_from_json(value);
 82 |             }
 83 |         },
 84 |         Value::String(str) => {
 85 |             if !str.is_empty() {
 86 |                 let replacement = string_null_byte_replacement(str);
 87 |                 *str = replacement;
 88 |             }
 89 |         },
 90 |         _ => {},
 91 |     }
 92 | }
 93 | 
 94 | fn string_null_byte_replacement(value: &str) -> String {
 95 |     value.replace('\u{0000}', "").replace("\\u0000", "")
 96 | }
 97 | 
 98 | pub fn deserialize_string_from_hexstring<'de, D>(
 99 |     deserializer: D,
100 | ) -> core::result::Result<String, D::Error>
101 | where
102 |     D: Deserializer<'de>,
103 | {
104 |     let s = <String>::deserialize(deserializer)?;
105 |     Ok(String::from_utf8(hex_to_raw_bytes(&s).unwrap()).unwrap_or(s))
106 | }
107 | 
108 | /// Deserialize from string to type T
109 | pub fn deserialize_from_string<'de, D, T>(deserializer: D) -> Result<T, D::Error>
110 | where
111 |     D: Deserializer<'de>,
112 |     T: FromStr,
113 |     <T as FromStr>::Err: std::fmt::Display,
114 | {
115 |     use serde::de::Error;
116 | 
117 |     let s = <String>::deserialize(deserializer)?;
118 |     s.parse::<T>().map_err(D::Error::custom)
119 | }
120 | 
121 | /// Convert the bcs serialized vector<u8> to its original string format
122 | pub fn convert_bcs_hex(typ: String, value: String) -> Option<String> {
123 |     let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?;
124 | 
125 |     match typ.as_str() {
126 |         "0x1::string::String" => bcs::from_bytes::<String>(decoded.as_slice()),
127 |         "u8" => bcs::from_bytes::<u8>(decoded.as_slice()).map(|e| e.to_string()),
128 |         "u64" => bcs::from_bytes::<u64>(decoded.as_slice()).map(|e| e.to_string()),
129 |         "u128" => bcs::from_bytes::<u128>(decoded.as_slice()).map(|e| e.to_string()),
130 |         "bool" => bcs::from_bytes::<bool>(decoded.as_slice()).map(|e| e.to_string()),
131 |         "address" => bcs::from_bytes::<String>(decoded.as_slice()).map(|e| format!("0x{e}")),
132 |         _ => Ok(value),
133 |     }
134 |     .ok()
135 | }
136 | 
137 | /// Convert the bcs serialized vector<u8> to its original string format for token v2 property map.
138 | pub fn convert_bcs_hex_new(typ: u8, value: String) -> Option<String> {
139 |     let decoded = hex::decode(value.strip_prefix("0x").unwrap_or(&*value)).ok()?;
140 | 
141 |     match typ {
142 |         0 /* bool */ => bcs::from_bytes::<bool>(decoded.as_slice()).map(|e| e.to_string()),
143 |         1 /* u8 */ => bcs::from_bytes::<u8>(decoded.as_slice()).map(|e| e.to_string()),
144 |         2 /* u16 */ => bcs::from_bytes::<u16>(decoded.as_slice()).map(|e| e.to_string()),
145 |         3 /* u32 */ => bcs::from_bytes::<u32>(decoded.as_slice()).map(|e| e.to_string()),
146 |         4 /* u64 */ => bcs::from_bytes::<u64>(decoded.as_slice()).map(|e| e.to_string()),
147 |         5 /* u128 */ => bcs::from_bytes::<u128>(decoded.as_slice()).map(|e| e.to_string()),
148 |         6 /* u256 */ => bcs::from_bytes::<BigDecimal>(decoded.as_slice()).map(|e| e.to_string()),
149 |         7 /* address */ => bcs::from_bytes::<String>(decoded.as_slice()).map(|e| format!("0x{e}")),
150 |         8 /* byte_vector */ => bcs::from_bytes::<Vec<u8>>(decoded.as_slice()).map(|e| format!("0x{}", hex::encode(e))),
151 |         9 /* string */ => bcs::from_bytes::<String>(decoded.as_slice()),
152 |         _ => Ok(value),
153 |     }
154 |         .ok()
155 | }
156 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/errors.rs:
--------------------------------------------------------------------------------
 1 | use thiserror::Error;
 2 | 
 3 | #[derive(Error, Debug)]
 4 | pub enum ProcessorError {
 5 |     #[error("Step Init Error: {message}")]
 6 |     StepInitError { message: String },
 7 |     #[error("Process Error: {message}")]
 8 |     ProcessError { message: String },
 9 |     #[error("Poll Error: {message}")]
10 |     PollError { message: String },
11 |     #[error("DB Store Error: {message}, Query: {query:?}")]
12 |     DBStoreError {
13 |         message: String,
14 |         query: Option<String>,
15 |     },
16 |     #[error("Chain ID Check Error: {message}")]
17 |     ChainIdCheckError { message: String },
18 | }
19 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod chain_id_check;
2 | pub mod constants;
3 | pub mod convert;
4 | pub mod errors;
5 | pub mod extract;
6 | pub mod property_map;
7 | pub mod step_metrics;
8 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/sdk/src/utils/property_map.rs:
--------------------------------------------------------------------------------
  1 | // Copyright © Aptos Foundation
  2 | // SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | use crate::utils::convert::{convert_bcs_hex, convert_bcs_hex_new};
  5 | use ahash::AHashMap;
  6 | use indexmap::IndexMap;
  7 | use serde::{Deserialize, Serialize};
  8 | use serde_json::{Result, Value};
  9 | #[derive(Serialize, Deserialize, Debug, Clone)]
 10 | pub struct PropertyValue {
 11 |     value: String,
 12 |     typ: String,
 13 | }
 14 | 
 15 | pub fn create_property_value(typ: String, value: String) -> Result<PropertyValue> {
 16 |     Ok(PropertyValue {
 17 |         value: convert_bcs_hex(typ.clone(), value.clone()).unwrap_or(value),
 18 |         typ,
 19 |     })
 20 | }
 21 | 
 22 | #[derive(Serialize, Deserialize, Debug, Clone)]
 23 | pub struct PropertyMap {
 24 |     data: IndexMap<String, PropertyValue>,
 25 | }
 26 | 
 27 | impl PropertyMap {
 28 |     /// Deserializes PropertyValue from bcs encoded json
 29 |     pub fn from_bcs_encode_str(val: Value) -> Option<Value> {
 30 |         let mut pm = PropertyMap {
 31 |             data: IndexMap::new(),
 32 |         };
 33 |         let records: &Vec<Value> = val.get("map")?.get("data")?.as_array()?;
 34 |         for entry in records {
 35 |             let key = entry.get("key")?.as_str()?;
 36 |             let val = entry.get("value")?.get("value")?.as_str()?;
 37 |             let typ = entry.get("value")?.get("type")?.as_str()?;
 38 |             let pv = create_property_value(typ.to_string(), val.to_string()).ok()?;
 39 |             pm.data.insert(key.to_string(), pv);
 40 |         }
 41 |         Some(Self::to_flat_json(pm))
 42 |     }
 43 | 
 44 |     /// Flattens PropertyMap which can't be easily consumable by downstream.
 45 |     /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}}
 46 |     /// becomes Object {"creation_time_sec": "1666125588"}
 47 |     fn to_flat_json(val: PropertyMap) -> Value {
 48 |         let mut map = AHashMap::new();
 49 |         for (k, v) in val.data {
 50 |             map.insert(k, v.value);
 51 |         }
 52 |         serde_json::to_value(map).unwrap()
 53 |     }
 54 | }
 55 | 
 56 | #[derive(Serialize, Deserialize, Debug, Clone)]
 57 | pub struct TokenObjectPropertyValue {
 58 |     value: String,
 59 |     typ: u8,
 60 | }
 61 | 
 62 | pub fn create_token_object_property_value(
 63 |     typ: u8,
 64 |     value: String,
 65 | ) -> Result<TokenObjectPropertyValue> {
 66 |     Ok(TokenObjectPropertyValue {
 67 |         value: convert_bcs_hex_new(typ, value.clone()).unwrap_or(value),
 68 |         typ,
 69 |     })
 70 | }
 71 | 
 72 | #[derive(Serialize, Deserialize, Debug, Clone)]
 73 | pub struct TokenObjectPropertyMap {
 74 |     data: IndexMap<String, TokenObjectPropertyValue>,
 75 | }
 76 | 
 77 | impl TokenObjectPropertyMap {
 78 |     /// Deserializes PropertyValue from bcs encoded json
 79 |     pub fn from_bcs_encode_str(val: Value) -> Option<Value> {
 80 |         let mut pm = TokenObjectPropertyMap {
 81 |             data: IndexMap::new(),
 82 |         };
 83 |         let records: &Vec<Value> = val.get("data")?.as_array()?;
 84 |         for entry in records {
 85 |             let key = entry.get("key")?.as_str()?;
 86 |             let val = entry.get("value")?.get("value")?.as_str()?;
 87 |             let typ = entry.get("value")?.get("type")?.as_u64()?;
 88 |             let pv = create_token_object_property_value(typ as u8, val.to_string()).ok()?;
 89 |             pm.data.insert(key.to_string(), pv);
 90 |         }
 91 |         Some(Self::to_flat_json_new(pm))
 92 |     }
 93 | 
 94 |     /// Flattens PropertyMap which can't be easily consumable by downstream.
 95 |     /// For example: Object {"data": Object {"creation_time_sec": Object {"value": String("1666125588")}}}
 96 |     /// becomes Object {"creation_time_sec": "1666125588"}
 97 |     fn to_flat_json_new(val: TokenObjectPropertyMap) -> Value {
 98 |         let mut map = IndexMap::new();
 99 |         for (k, v) in val.data {
100 |             map.insert(k, v.value);
101 |         }
102 |         serde_json::to_value(map).unwrap()
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "aptos-indexer-transaction-stream"
 3 | version = "0.1.0"
 4 | 
 5 | # Workspace inherited keys
 6 | authors = { workspace = true }
 7 | edition = { workspace = true }
 8 | homepage = { workspace = true }
 9 | license = { workspace = true }
10 | publish = { workspace = true }
11 | repository = { workspace = true }
12 | rust-version = { workspace = true }
13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14 | 
15 | [dependencies]
16 | anyhow = { workspace = true }
17 | aptos-moving-average = { workspace = true }
18 | aptos-protos = { workspace = true }
19 | aptos-transaction-filter = { workspace = true }
20 | chrono = { workspace = true }
21 | futures-util = { workspace = true }
22 | once_cell = { workspace = true }
23 | prometheus = { workspace = true }
24 | prost = { workspace = true }
25 | sample = { workspace = true }
26 | serde = { workspace = true }
27 | tokio = { workspace = true }
28 | tonic = { workspace = true }
29 | tracing = { workspace = true }
30 | url = { workspace = true }
31 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/src/config.rs:
--------------------------------------------------------------------------------
 1 | use crate::utils::additional_headers::AdditionalHeaders;
 2 | use aptos_transaction_filter::BooleanTransactionFilter;
 3 | use serde::{Deserialize, Serialize};
 4 | use std::time::Duration;
 5 | use url::Url;
 6 | 
 7 | #[derive(Clone, Debug, Deserialize, Serialize)]
 8 | #[serde(deny_unknown_fields)]
 9 | pub struct TransactionStreamConfig {
10 |     pub indexer_grpc_data_service_address: Url,
11 |     pub starting_version: Option<u64>,
12 |     pub request_ending_version: Option<u64>,
13 |     pub auth_token: String,
14 |     pub request_name_header: String,
15 |     #[serde(default)]
16 |     pub additional_headers: AdditionalHeaders,
17 |     #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_interval")]
18 |     pub indexer_grpc_http2_ping_interval_secs: u64,
19 |     #[serde(default = "TransactionStreamConfig::default_indexer_grpc_http2_ping_timeout")]
20 |     pub indexer_grpc_http2_ping_timeout_secs: u64,
21 |     #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_timeout")]
22 |     pub indexer_grpc_reconnection_timeout_secs: u64,
23 |     #[serde(default = "TransactionStreamConfig::default_indexer_grpc_response_item_timeout")]
24 |     pub indexer_grpc_response_item_timeout_secs: u64,
25 |     #[serde(default = "TransactionStreamConfig::default_indexer_grpc_reconnection_max_retries")]
26 |     pub indexer_grpc_reconnection_max_retries: u64,
27 |     #[serde(default)]
28 |     pub transaction_filter: Option<BooleanTransactionFilter>,
29 | }
30 | 
31 | impl TransactionStreamConfig {
32 |     pub const fn indexer_grpc_http2_ping_interval(&self) -> Duration {
33 |         Duration::from_secs(self.indexer_grpc_http2_ping_interval_secs)
34 |     }
35 | 
36 |     pub const fn indexer_grpc_http2_ping_timeout(&self) -> Duration {
37 |         Duration::from_secs(self.indexer_grpc_http2_ping_timeout_secs)
38 |     }
39 | 
40 |     pub const fn indexer_grpc_reconnection_timeout(&self) -> Duration {
41 |         Duration::from_secs(self.indexer_grpc_reconnection_timeout_secs)
42 |     }
43 | 
44 |     pub const fn indexer_grpc_response_item_timeout(&self) -> Duration {
45 |         Duration::from_secs(self.indexer_grpc_response_item_timeout_secs)
46 |     }
47 | 
48 |     /// Indexer GRPC http2 ping interval in seconds. Defaults to 30.
49 |     /// Tonic ref: https://docs.rs/tonic/latest/tonic/transport/channel/struct.Endpoint.html#method.http2_keep_alive_interval
50 |     pub const fn default_indexer_grpc_http2_ping_interval() -> u64 {
51 |         30
52 |     }
53 | 
54 |     /// Indexer GRPC http2 ping timeout in seconds. Defaults to 10.
55 |     pub const fn default_indexer_grpc_http2_ping_timeout() -> u64 {
56 |         10
57 |     }
58 | 
59 |     /// Default timeout for establishing a grpc connection. Defaults to 5 seconds.
60 |     pub const fn default_indexer_grpc_reconnection_timeout() -> u64 {
61 |         5
62 |     }
63 | 
64 |     /// Default timeout for receiving an item from grpc stream. Defaults to 60 seconds.
65 |     pub const fn default_indexer_grpc_response_item_timeout() -> u64 {
66 |         60
67 |     }
68 | 
69 |     /// Default max retries for reconnecting to grpc. Defaults to 100.
70 |     pub const fn default_indexer_grpc_reconnection_max_retries() -> u64 {
71 |         5
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/src/lib.rs:
--------------------------------------------------------------------------------
1 | pub mod config;
2 | pub mod transaction_stream;
3 | pub mod utils;
4 | 
5 | pub use aptos_transaction_filter::*;
6 | pub use config::TransactionStreamConfig;
7 | pub use transaction_stream::{TransactionStream, TransactionsPBResponse};
8 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/src/utils/additional_headers.rs:
--------------------------------------------------------------------------------
 1 | use anyhow::{Context, Result};
 2 | use serde::{Deserialize, Serialize};
 3 | use std::{collections::HashMap, str::FromStr};
 4 | use tonic::metadata::{Ascii, MetadataKey, MetadataMap, MetadataValue};
 5 | 
 6 | #[allow(clippy::too_long_first_doc_paragraph)]
 7 | /// This struct holds additional headers that we attach to the request metadata.
 8 | /// Regarding serde, we just serialize this as we would a HashMap<String, String>.
 9 | /// Similarly, we expect that format when deserializing.
10 | ///
11 | /// It is necessary to use HashMap because there is no extend method on MetadataMap
12 | /// itself, nor does it implement Serialize / Deserialize. It is better to parse once
13 | /// here right at config validation time anyway, it exposes any error as early as
14 | /// possible and saves us doing parsing (perhaps multiple times) later.
15 | #[derive(Clone, Debug, Default, Serialize, Deserialize)]
16 | #[serde(try_from = "HashMap<String, String>")]
17 | #[serde(into = "HashMap<String, String>")]
18 | pub struct AdditionalHeaders(HashMap<MetadataKey<Ascii>, MetadataValue<Ascii>>);
19 | 
20 | impl AdditionalHeaders {
21 |     pub fn drain_into_metadata_map(self, metadata_map: &mut MetadataMap) {
22 |         for (key, value) in self.0 {
23 |             metadata_map.insert(key, value);
24 |         }
25 |     }
26 | }
27 | 
28 | impl TryFrom<HashMap<String, String>> for AdditionalHeaders {
29 |     type Error = anyhow::Error;
30 | 
31 |     /// Build `AdditionalHeaders` from just a map of strings. This can fail if the
32 |     /// strings contain invalid characters for metadata keys / values, the chars must
33 |     /// only be visible ascii characters.
34 |     fn try_from(map: HashMap<String, String>) -> Result<Self, Self::Error> {
35 |         let mut out = HashMap::new();
36 |         for (k, v) in map {
37 |             let k = MetadataKey::from_str(&k)
38 |                 .with_context(|| format!("Failed to parse key as ascii metadata key: {k}"))?;
39 |             let v = MetadataValue::from_str(&v)
40 |                 .with_context(|| format!("Failed to parse value as ascii metadata value: {v}"))?;
41 |             out.insert(k, v);
42 |         }
43 |         Ok(AdditionalHeaders(out))
44 |     }
45 | }
46 | 
47 | impl From<AdditionalHeaders> for HashMap<String, String> {
48 |     fn from(headers: AdditionalHeaders) -> Self {
49 |         headers
50 |             .0
51 |             .into_iter()
52 |             // It is safe to unwrap here because when building this we asserted that the
53 |             // MetadataValue only contained visible ascii characters.
54 |             .map(|(k, v)| (k.as_str().to_owned(), v.to_str().unwrap().to_owned()))
55 |             .collect()
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/src/utils/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod additional_headers;
2 | pub mod time;
3 | 


--------------------------------------------------------------------------------
/aptos-indexer-processors-sdk/transaction-stream/src/utils/time.rs:
--------------------------------------------------------------------------------
  1 | // Copyright © Aptos Foundation
  2 | // SPDX-License-Identifier: Apache-2.0
  3 | 
  4 | //! Helpers related to dealing with dates and times.
  5 | 
  6 | use aptos_protos::util::timestamp::Timestamp;
  7 | use chrono::Utc;
  8 | 
  9 | /// 9999-12-31 23:59:59, this is the max supported by Google BigQuery.
 10 | pub const MAX_TIMESTAMP_SECS: i64 = 253_402_300_799;
 11 | 
 12 | pub fn parse_timestamp(ts: &Timestamp, version: i64) -> chrono::DateTime<Utc> {
 13 |     let final_ts = if ts.seconds >= MAX_TIMESTAMP_SECS || ts.seconds < 0 {
 14 |         Timestamp {
 15 |             seconds: MAX_TIMESTAMP_SECS,
 16 |             nanos: 0,
 17 |         }
 18 |     } else {
 19 |         *ts
 20 |     };
 21 |     chrono::DateTime::from_timestamp(final_ts.seconds, final_ts.nanos as u32)
 22 |         .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}"))
 23 | }
 24 | 
 25 | pub fn parse_timestamp_secs(ts: u64, version: i64) -> chrono::DateTime<Utc> {
 26 |     chrono::DateTime::from_timestamp(std::cmp::min(ts, MAX_TIMESTAMP_SECS as u64) as i64, 0)
 27 |         .unwrap_or_else(|| panic!("Could not parse timestamp {ts:?} for version {version}"))
 28 | }
 29 | 
 30 | pub fn compute_nanos_since_epoch(datetime: chrono::DateTime<Utc>) -> u64 {
 31 |     // The Unix epoch is 1970-01-01T00:00:00Z
 32 |     let unix_epoch = chrono::DateTime::<Utc>::from_timestamp(0, 0).unwrap();
 33 |     let duration_since_epoch = datetime.signed_duration_since(unix_epoch);
 34 | 
 35 |     // Convert the duration to nanoseconds and return
 36 |     duration_since_epoch.num_seconds() as u64 * 1_000_000_000
 37 |         + duration_since_epoch.subsec_nanos() as u64
 38 | }
 39 | 
 40 | /// Convert the protobuf Timestamp to epcoh time in seconds.
 41 | pub fn time_diff_since_pb_timestamp_in_secs(timestamp: &Timestamp) -> f64 {
 42 |     let current_timestamp = std::time::SystemTime::now()
 43 |         .duration_since(std::time::UNIX_EPOCH)
 44 |         .expect("SystemTime before UNIX EPOCH!")
 45 |         .as_secs_f64();
 46 |     let transaction_time = timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9;
 47 |     current_timestamp - transaction_time
 48 | }
 49 | 
 50 | /// Convert the protobuf timestamp to ISO format
 51 | pub fn timestamp_to_iso(timestamp: &Timestamp) -> String {
 52 |     let dt = parse_timestamp(timestamp, 0);
 53 |     dt.format("%Y-%m-%dT%H:%M:%S%.9fZ").to_string()
 54 | }
 55 | 
 56 | /// Convert the protobuf timestamp to unixtime
 57 | pub fn timestamp_to_unixtime(timestamp: &Timestamp) -> f64 {
 58 |     timestamp.seconds as f64 + timestamp.nanos as f64 * 1e-9
 59 | }
 60 | 
 61 | #[cfg(test)]
 62 | mod tests {
 63 |     use super::*;
 64 |     use chrono::Datelike;
 65 | 
 66 |     #[test]
 67 |     fn test_parse_timestamp() {
 68 |         let ts = parse_timestamp(
 69 |             &Timestamp {
 70 |                 seconds: 1649560602,
 71 |                 nanos: 0,
 72 |             },
 73 |             1,
 74 |         )
 75 |         .naive_utc();
 76 |         assert_eq!(ts.and_utc().timestamp(), 1649560602);
 77 |         assert_eq!(ts.year(), 2022);
 78 | 
 79 |         let too_high_ts = parse_timestamp(
 80 |             &Timestamp {
 81 |                 seconds: u64::MAX as i64, // Convert a really big number to i64
 82 |                 nanos: 0,
 83 |             },
 84 |             1,
 85 |         );
 86 |         let max_ts = parse_timestamp(
 87 |             &Timestamp {
 88 |                 seconds: MAX_TIMESTAMP_SECS,
 89 |                 nanos: 0,
 90 |             },
 91 |             1,
 92 |         );
 93 |         assert_eq!(too_high_ts, max_ts);
 94 | 
 95 |         let ts2 = parse_timestamp_secs(600000000000000, 2);
 96 |         assert_eq!(ts2.year(), 9999);
 97 | 
 98 |         let ts3 = parse_timestamp_secs(1659386386, 2);
 99 |         assert_eq!(ts3.timestamp(), 1659386386);
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/examples/.cargo/config.toml:
--------------------------------------------------------------------------------
 1 | [alias]
 2 | xclippy = [
 3 |   "clippy",
 4 |   "--workspace",
 5 |   "--all-targets",
 6 |   "--",
 7 |   "-Dwarnings",
 8 |   "-Wclippy::all",
 9 |   "-Aclippy::upper_case_acronyms",
10 |   "-Aclippy::enum-variant-names",
11 |   "-Aclippy::result-large-err",
12 |   "-Aclippy::mutable-key-type",
13 |   "-Aclippy::map_identity",        # We temporarily ignore this due to: https://github.com/rust-lang/rust-clippy/issues/11764
14 | ]
15 | 
16 | [build]
17 | rustflags = [
18 |   "--cfg",
19 |   "tokio_unstable",
20 |   "-C",
21 |   "force-frame-pointers=yes",
22 |   "-C",
23 |   "force-unwind-tables=yes",
24 | ]
25 | 
26 | # TODO(grao): Figure out whether we should enable other cpu features, and whether we should use a different way to configure them rather than list every single one here.
27 | #[target.x86_64-unknown-linux-gnu]
28 | #rustflags = ["--cfg", "tokio_unstable", "-C", "link-arg=-fuse-ld=lld", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "target-feature=+sse4.2"]
29 | 
30 | # 64 bit MSVC
31 | #[target.x86_64-pc-windows-msvc]
32 | #rustflags = ["--cfg", "tokio_unstable", "-C", "force-frame-pointers=yes", "-C", "force-unwind-tables=yes", "-C", "link-arg=/STACK:8000000" # Set stack to 8 MB]
33 | 


--------------------------------------------------------------------------------
/examples/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | resolver = "2"
 3 | 
 4 | members = ["postgres-basic-events-example"]
 5 | 
 6 | [workspace.package]
 7 | authors = ["Aptos Labs <opensource@aptoslabs.com>"]
 8 | edition = "2021"
 9 | homepage = "https://aptoslabs.com"
10 | license = "Apache-2.0"
11 | publish = false
12 | repository = "https://github.com/aptos-labs/aptos-indexer-processor-sdk"
13 | rust-version = "1.78"
14 | 
15 | [workspace.dependencies]
16 | postgres-basic-events-example = { path = "postgres-basic-events-example" }
17 | 
18 | aptos-indexer-processor-sdk = { path = "../aptos-indexer-processors-sdk/sdk", features = [
19 |     "postgres_full",
20 | ] }
21 | anyhow = "1.0.86"
22 | async-trait = "0.1.80"
23 | clap = { version = "4.3.5", features = ["derive", "unstable-styles"] }
24 | diesel = { version = "=2.2.0", features = [
25 |     "chrono",
26 |     "postgres_backend",
27 |     "numeric",
28 |     "serde_json",
29 | ] }
30 | diesel_migrations = { version = "2.1.0", features = ["postgres"] }
31 | field_count = "0.1.1"
32 | rayon = "1.10.0"
33 | sample = { path = "../aptos-indexer-processors-sdk/sample" }
34 | serde = { version = "1.0.193", features = ["derive", "rc"] }
35 | serde_json = { version = "1.0.81", features = ["preserve_order"] }
36 | tokio = { version = "1.37.0", features = ["full"] }
37 | tracing = "0.1.34"
38 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "postgres-basic-events-example"
 3 | version = "1.0.0"
 4 | 
 5 | # Workspace inherited keys
 6 | authors = { workspace = true }
 7 | edition = { workspace = true }
 8 | homepage = { workspace = true }
 9 | license = { workspace = true }
10 | publish = { workspace = true }
11 | repository = { workspace = true }
12 | rust-version = { workspace = true }
13 | 
14 | [dependencies]
15 | anyhow = { workspace = true }
16 | aptos-indexer-processor-sdk = { workspace = true }
17 | async-trait = { workspace = true }
18 | clap = { workspace = true }
19 | diesel = { workspace = true }
20 | diesel_migrations = { workspace = true }
21 | field_count = { workspace = true }
22 | rayon = { workspace = true }
23 | serde = { workspace = true }
24 | serde_json = { workspace = true }
25 | tokio = { workspace = true }
26 | tracing = { workspace = true }
27 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/README.md:
--------------------------------------------------------------------------------
 1 | # Example Postgres events processor 
 2 | 
 3 | ## About 
 4 | 
 5 | A basic processor that indexes events into Postgres. It uses the `process_function` utility function. 
 6 | 
 7 | ## How to use
 8 | 1. Install Postgres and Diesel CLI
 9 | 2. Construct a `config.yaml` file. You can see `postgres-basic-events-example/example-config.yaml` as an example. 
10 | 3. cd ~/aptos-indexer-processors-sdk/example
11 | 4. cargo run -p postgres-basic-events-example -- -c postgres-basic-events-example/example-config.yaml
12 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/example-config.yaml:
--------------------------------------------------------------------------------
 1 | # This is a template yaml for the processor
 2 | health_check_port: 8085
 3 | server_config:
 4 |   transaction_stream_config:
 5 |     indexer_grpc_data_service_address: "https://grpc.mainnet.aptoslabs.com:443"
 6 |     auth_token: "AUTH_TOKEN"
 7 |     request_name_header: "events-processor"
 8 |     starting_version: 0
 9 |   postgres_config:
10 |     connection_string: postgresql://postgres:@localhost:5432/example
11 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/diesel.toml:
--------------------------------------------------------------------------------
1 | # For documentation on how to configure this file,
2 | # see https://diesel.rs/guides/configuring-diesel-cli
3 | 
4 | [print_schema]
5 | file = "schema.rs"
6 | 
7 | [migrations_directory]
8 | dir = "migrations"
9 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/down.sql:
--------------------------------------------------------------------------------
1 | -- This file was automatically created by Diesel to setup helper functions
2 | -- and other internal bookkeeping. This file is safe to edit, any future
3 | -- changes will be added to existing projects as new migrations.
4 | 
5 | DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
6 | DROP FUNCTION IF EXISTS diesel_set_updated_at();
7 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/migrations/00000000000000_diesel_initial_setup/up.sql:
--------------------------------------------------------------------------------
 1 | -- This file was automatically created by Diesel to setup helper functions
 2 | -- and other internal bookkeeping. This file is safe to edit, any future
 3 | -- changes will be added to existing projects as new migrations.
 4 | 
 5 | 
 6 | 
 7 | 
 8 | -- Sets up a trigger for the given table to automatically set a column called
 9 | -- `updated_at` whenever the row is modified (unless `updated_at` was included
10 | -- in the modified columns)
11 | --
12 | -- # Example
13 | --
14 | -- ```sql
15 | -- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
16 | --
17 | -- SELECT diesel_manage_updated_at('users');
18 | -- ```
19 | CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
20 | BEGIN
21 |     EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
22 |                     FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
23 | END;
24 | $$ LANGUAGE plpgsql;
25 | 
26 | CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
27 | BEGIN
28 |     IF (
29 |         NEW IS DISTINCT FROM OLD AND
30 |         NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
31 |     ) THEN
32 |         NEW.updated_at := current_timestamp;
33 |     END IF;
34 |     RETURN NEW;
35 | END;
36 | $$ LANGUAGE plpgsql;
37 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/down.sql:
--------------------------------------------------------------------------------
1 | -- This file should undo anything in `up.sql`
2 | DROP TABLE IF EXISTS events;
3 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/migrations/2025-03-06-231718_create_events/up.sql:
--------------------------------------------------------------------------------
 1 | -- Your SQL goes here
 2 | CREATE TABLE events (
 3 |     sequence_number BIGINT NOT NULL,
 4 |     creation_number BIGINT NOT NULL,
 5 |     account_address VARCHAR(66) NOT NULL,
 6 |     transaction_version BIGINT NOT NULL,
 7 |     transaction_block_height BIGINT NOT NULL,
 8 |     type TEXT NOT NULL,
 9 |     data JSONB NOT NULL,
10 |     inserted_at TIMESTAMP NOT NULL DEFAULT NOW(),
11 |     event_index BIGINT NOT NULL,
12 |     indexed_type VARCHAR(300) NOT NULL,
13 |     PRIMARY KEY (transaction_version, event_index)
14 | );


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/db/schema.rs:
--------------------------------------------------------------------------------
 1 | // @generated automatically by Diesel CLI.
 2 | 
 3 | diesel::table! {
 4 |     events (transaction_version, event_index) {
 5 |         sequence_number -> Int8,
 6 |         creation_number -> Int8,
 7 |         #[max_length = 66]
 8 |         account_address -> Varchar,
 9 |         transaction_version -> Int8,
10 |         transaction_block_height -> Int8,
11 |         #[sql_name = "type"]
12 |         type_ -> Text,
13 |         data -> Jsonb,
14 |         inserted_at -> Timestamp,
15 |         event_index -> Int8,
16 |         #[max_length = 300]
17 |         indexed_type -> Varchar,
18 |     }
19 | }
20 | 
21 | diesel::allow_tables_to_appear_in_same_query!(events,);
22 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/events_model.rs:
--------------------------------------------------------------------------------
 1 | // Copyright © Aptos Foundation
 2 | // SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | #![allow(clippy::extra_unused_lifetimes)]
 5 | 
 6 | use crate::schema::events;
 7 | use aptos_indexer_processor_sdk::{
 8 |     aptos_protos::transaction::v1::Event as EventPB,
 9 |     utils::convert::{standardize_address, truncate_str},
10 | };
11 | use diesel::{Identifiable, Insertable};
12 | use field_count::FieldCount;
13 | use serde::{Deserialize, Serialize};
14 | 
15 | // p99 currently is 303 so using 300 as a safe max length
16 | const EVENT_TYPE_MAX_LENGTH: usize = 300;
17 | 
18 | #[derive(Clone, Debug, Deserialize, FieldCount, Identifiable, Insertable, Serialize)]
19 | #[diesel(primary_key(transaction_version, event_index))]
20 | #[diesel(table_name = events)]
21 | pub struct Event {
22 |     pub sequence_number: i64,
23 |     pub creation_number: i64,
24 |     pub account_address: String,
25 |     pub transaction_version: i64,
26 |     pub transaction_block_height: i64,
27 |     pub type_: String,
28 |     pub data: serde_json::Value,
29 |     pub event_index: i64,
30 |     pub indexed_type: String,
31 | }
32 | 
33 | impl Event {
34 |     pub fn from_event(
35 |         event: &EventPB,
36 |         transaction_version: i64,
37 |         transaction_block_height: i64,
38 |         event_index: i64,
39 |     ) -> Self {
40 |         let t: &str = event.type_str.as_ref();
41 |         Event {
42 |             account_address: standardize_address(
43 |                 event.key.as_ref().unwrap().account_address.as_str(),
44 |             ),
45 |             creation_number: event.key.as_ref().unwrap().creation_number as i64,
46 |             sequence_number: event.sequence_number as i64,
47 |             transaction_version,
48 |             transaction_block_height,
49 |             type_: t.to_string(),
50 |             data: serde_json::from_str(event.data.as_str()).unwrap(),
51 |             event_index,
52 |             indexed_type: truncate_str(t, EVENT_TYPE_MAX_LENGTH),
53 |         }
54 |     }
55 | 
56 |     pub fn from_events(
57 |         events: &[EventPB],
58 |         transaction_version: i64,
59 |         transaction_block_height: i64,
60 |     ) -> Vec<Self> {
61 |         events
62 |             .iter()
63 |             .enumerate()
64 |             .map(|(index, event)| {
65 |                 Self::from_event(
66 |                     event,
67 |                     transaction_version,
68 |                     transaction_block_height,
69 |                     index as i64,
70 |                 )
71 |             })
72 |             .collect::<Vec<EventModel>>()
73 |     }
74 | }
75 | 
76 | // Prevent conflicts with other things named `Event`
77 | pub type EventModel = Event;
78 | 


--------------------------------------------------------------------------------
/examples/postgres-basic-events-example/src/main.rs:
--------------------------------------------------------------------------------
 1 | use crate::events_model::EventModel;
 2 | use anyhow::Result;
 3 | use aptos_indexer_processor_sdk::{
 4 |     aptos_protos::transaction::v1::transaction::TxnData,
 5 |     postgres::{
 6 |         basic_processor::process,
 7 |         utils::database::{execute_in_chunks, MAX_DIESEL_PARAM_SIZE},
 8 |     },
 9 | };
10 | use diesel::{pg::Pg, query_builder::QueryFragment};
11 | use diesel_migrations::{embed_migrations, EmbeddedMigrations};
12 | use field_count::FieldCount;
13 | use rayon::prelude::*;
14 | use tracing::{error, info, warn};
15 | 
16 | pub mod events_model;
17 | #[path = "db/schema.rs"]
18 | pub mod schema;
19 | 
20 | const MIGRATIONS: EmbeddedMigrations = embed_migrations!("src/db/migrations");
21 | 
22 | fn insert_events_query(
23 |     items_to_insert: Vec<EventModel>,
24 | ) -> impl QueryFragment<Pg> + diesel::query_builder::QueryId + Send {
25 |     use crate::schema::events::dsl::*;
26 |     diesel::insert_into(crate::schema::events::table)
27 |         .values(items_to_insert)
28 |         .on_conflict((transaction_version, event_index))
29 |         .do_nothing()
30 | }
31 | 
32 | #[tokio::main]
33 | async fn main() -> Result<()> {
34 |     process(
35 |         "events_processor".to_string(),
36 |         MIGRATIONS,
37 |         async |transactions, conn_pool| {
38 |             let events = transactions
39 |                 .par_iter()
40 |                 .map(|txn| {
41 |                     let txn_version = txn.version as i64;
42 |                     let block_height = txn.block_height as i64;
43 |                     let txn_data = match txn.txn_data.as_ref() {
44 |                         Some(data) => data,
45 |                         None => {
46 |                             warn!(
47 |                                 transaction_version = txn_version,
48 |                                 "Transaction data doesn't exist"
49 |                             );
50 |                             return vec![];
51 |                         },
52 |                     };
53 |                     let default = vec![];
54 |                     let raw_events = match txn_data {
55 |                         TxnData::BlockMetadata(tx_inner) => &tx_inner.events,
56 |                         TxnData::Genesis(tx_inner) => &tx_inner.events,
57 |                         TxnData::User(tx_inner) => &tx_inner.events,
58 |                         _ => &default,
59 |                     };
60 | 
61 |                     EventModel::from_events(raw_events, txn_version, block_height)
62 |                 })
63 |                 .flatten()
64 |                 .collect::<Vec<EventModel>>();
65 | 
66 |             // Store events in the database
67 |             let execute_res = execute_in_chunks(
68 |                 conn_pool.clone(),
69 |                 insert_events_query,
70 |                 &events,
71 |                 MAX_DIESEL_PARAM_SIZE / EventModel::field_count(),
72 |             )
73 |             .await;
74 |             match execute_res {
75 |                 Ok(_) => {
76 |                     info!(
77 |                         "Events version [{}, {}] stored successfully",
78 |                         transactions.first().unwrap().version,
79 |                         transactions.last().unwrap().version
80 |                     );
81 |                     Ok(())
82 |                 },
83 |                 Err(e) => {
84 |                     error!("Failed to store events: {:?}", e);
85 |                     Err(e)
86 |                 },
87 |             }
88 |         },
89 |     )
90 |     .await?;
91 |     Ok(())
92 | }
93 | 


--------------------------------------------------------------------------------
/examples/rustfmt.toml:
--------------------------------------------------------------------------------
 1 | combine_control_expr = false
 2 | edition = "2021"
 3 | imports_granularity = "Crate"
 4 | format_macro_matchers = true
 5 | group_imports = "One"
 6 | hex_literal_case = "Upper"
 7 | match_block_trailing_comma = true
 8 | newline_style = "Unix"
 9 | overflow_delimited_expr = true
10 | reorder_impl_items = true
11 | use_field_init_shorthand = true
12 | 


--------------------------------------------------------------------------------
/scripts/check_banned_deps.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # This script checks if the crates in both examples and aptos-indexer-processors-sdk
 4 | # depend on external deps that they shouldn't. We run this in CI to make sure we don't
 5 | # accidentally reintroduce deps that would make the crates unusable for the CLI.
 6 | #
 7 | # While it would be more reliable to actually build the crate and check what libraries
 8 | # it links to, e.g. with otool, it is much cheaper to use cargo tree. As far as I can
 9 | # tell the entire Rust ecosystem makes use of these `x-sys` libraries to depend on
10 | # external dynamically linked libraries.
11 | #
12 | # Run this from the root directory of the project.
13 | 
14 | # Make sure we're in the root directory
15 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then
16 |     echo "Please run this script from the root directory of the project"
17 |     exit 1
18 | fi
19 | 
20 | # We only run the check on the SDK since that's the only crate used by the CLI. 
21 | cd "aptos-indexer-processors-sdk"
22 | 
23 | declare -a deps=("pq-sys" "openssl-sys")
24 | 
25 | for dep in "${deps[@]}"; do
26 |     echo "Checking for banned dependency $dep..."
27 | 
28 |     # Check for deps. As you can see, we only check for MacOS right now.
29 |     # We specify --features postgres_partial because we only care about these banned deps
30 |     # for the local testnet use case, in which case it uses only a subset of the
31 |     # postgres features that don't include pq-sys. 
32 |     out=`cargo tree --features postgres_partial -e features,no-build,no-dev --target aarch64-apple-darwin -i "$dep"`
33 | 
34 |     # If the exit status was non-zero, great, the dep couldn't be found.
35 |     if [ $? -ne 0 ]; then
36 |         continue
37 |     fi
38 | 
39 |     # If the exit status was zero we have to check the output to see if the dep is in
40 |     # use. If it is in the output, it is in use.
41 |     if [[ $out != *"$dep"* ]]; then
42 |         continue
43 |     fi
44 | 
45 |     echo "Banned dependency $dep found in $dir!"
46 |     cd ../..
47 |     exit 1
48 | done
49 | 
50 | echo "None of the banned dependencies are in use in $dir, great!"
51 | 
52 | exit 0


--------------------------------------------------------------------------------
/scripts/rust_lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # This script runs linting for both the examples and aptos-indexer-processors-sdk directories
 4 | # It assumes you have cargo-sort installed. If not, you can install it with:
 5 | # cargo install cargo-sort
 6 | 
 7 | set -e
 8 | 
 9 | # Function to run linting in a directory
10 | run_lint() {
11 |     local dir=$1
12 |     echo "Running lint in $dir..."
13 |     
14 |     # Change to the directory
15 |     cd "$dir"
16 |     
17 |     # Run in check mode if requested
18 |     CHECK_ARG=""
19 |     if [ "$1" = "--check" ]; then
20 |         CHECK_ARG="--check"
21 |     fi
22 |     
23 |     # Run the linting commands
24 |     cargo +nightly xclippy
25 |     cargo +nightly fmt $CHECK_ARG
26 |     cargo sort --grouped --workspace $CHECK_ARG
27 |     
28 |     # Return to the original directory
29 |     cd ..
30 | }
31 | 
32 | # Make sure we're in the root directory
33 | if [ ! -d "examples" ] || [ ! -d "aptos-indexer-processors-sdk" ]; then
34 |     echo "Please run this script from the root directory of the project"
35 |     exit 1
36 | fi
37 | 
38 | # Run linting for both directories
39 | echo "Starting linting process..."
40 | 
41 | echo "\nLinting examples directory..."
42 | run_lint "examples"
43 | 
44 | echo "\nLinting aptos-indexer-processors-sdk directory..."
45 | run_lint "aptos-indexer-processors-sdk"
46 | 
47 | echo "\nLinting completed successfully!" 


--------------------------------------------------------------------------------