├── evm-decode ├── .gitignore └── Cargo.toml ├── ingest ├── src │ ├── provider │ │ └── mod.rs │ ├── rayon_async.rs │ ├── lib.rs │ ├── evm.rs │ └── svm.rs └── Cargo.toml ├── python ├── examples │ ├── base58.py │ ├── hypersync_continuous.py │ ├── sqd_continuous.py │ ├── v2_pool_tokens.py │ ├── token_metadata.py │ ├── v_r_s_schema.py │ ├── cherry_ingest_erc20.py │ ├── solana.py │ ├── cryo_erc20.py │ ├── yellowstone_grpc.py │ ├── svm_decode_log.py │ ├── cryo_erc20_datafusion.py │ └── solana_all.py ├── pyproject.toml ├── README.md ├── .gitignore ├── Cargo.toml ├── LICENSE-MIT ├── cherry_core │ ├── svm_decode │ │ └── __init__.py │ ├── ingest │ │ ├── __init__.py │ │ ├── evm.py │ │ └── svm.py │ └── __init__.py ├── src │ ├── ingest.rs │ └── lib.rs └── LICENSE-APACHE ├── core ├── src │ ├── lib.rs │ └── tests.rs └── Cargo.toml ├── evm-schema └── Cargo.toml ├── svm-schema ├── Cargo.toml └── src │ └── lib.rs ├── query └── Cargo.toml ├── .gitignore ├── README.md ├── cast ├── Cargo.toml └── src │ └── lib.rs ├── svm-decode ├── Cargo.toml └── src │ └── deserialize.rs ├── Cargo.toml ├── LICENSE-MIT ├── .github └── workflows │ ├── ci.yaml │ └── publish-python.yaml └── LICENSE-APACHE /evm-decode/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | -------------------------------------------------------------------------------- /ingest/src/provider/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod common; 2 | pub mod hypersync; 3 | pub mod sqd; 4 | -------------------------------------------------------------------------------- /python/examples/base58.py: -------------------------------------------------------------------------------- 1 | from cherry_core import base58_encode_bytes, base58_decode_string 2 | 3 | b = b"asdlmsa;dm1123213213:SDMA" 4 | 5 | s = base58_encode_bytes(b) 6 | 7 | print(s) 8 | 9 | b2 = base58_decode_string(s) 10 | 11 | print(b) 12 | print(b2) 13 | 14 | print(b == b2) 15 | -------------------------------------------------------------------------------- /core/src/lib.rs: -------------------------------------------------------------------------------- 1 | pub use cherry_cast as cast; 2 | pub use cherry_evm_decode as evm_decode; 3 | pub use cherry_evm_schema as evm_schema; 4 | pub use cherry_ingest as ingest; 5 | pub use cherry_query as query; 6 | pub use cherry_svm_decode as svm_decode; 7 | pub use cherry_svm_schema as svm_schema; 8 | #[cfg(test)] 9 | mod tests; 10 | -------------------------------------------------------------------------------- /ingest/src/rayon_async.rs: -------------------------------------------------------------------------------- 1 | use tokio::sync::oneshot; 2 | 3 | pub fn spawn(func: F) -> oneshot::Receiver 4 | where 5 | F: 'static + FnOnce() -> T + Send, 6 | T: 'static + Send + Sync, 7 | { 8 | let (tx, rx) = oneshot::channel(); 9 | 10 | rayon::spawn(move || { 11 | let res = func(); 12 | tx.send(res).ok(); 13 | }); 14 | 15 | rx 16 | } 17 | -------------------------------------------------------------------------------- /evm-schema/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-evm-schema" 3 | version = "0.2.0" 4 | edition = "2021" 5 | description = "EVM schema definitions for cherry" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | -------------------------------------------------------------------------------- /svm-schema/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-svm-schema" 3 | version = "0.2.0" 4 | edition = "2021" 5 | description = "SVM schema definitions for cherry" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.8,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "cherry-core" 7 | requires-python = ">=3.8" 8 | classifiers = [ 9 | "Programming Language :: Rust", 10 | "Programming Language :: Python :: Implementation :: CPython", 11 | "Programming Language :: Python :: Implementation :: PyPy", 12 | ] 13 | dynamic = ["version"] 14 | 15 | [tool.maturin] 16 | features = ["pyo3/extension-module"] 17 | -------------------------------------------------------------------------------- /query/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-query" 3 | version = "0.3.0" 4 | edition = "2021" 5 | description = "Query executor for cherry" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | anyhow = { workspace = true } 14 | rayon = { workspace = true } 15 | hashbrown = { workspace = true } 16 | xxhash-rust = { workspace = true } 17 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # cherry-core python bindings 2 | 3 | ## License 4 | 5 | Licensed under either of 6 | 7 | * Apache License, Version 2.0 8 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 9 | * MIT license 10 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 11 | 12 | at your option. 13 | 14 | ## Contribution 15 | 16 | Unless you explicitly state otherwise, any contribution intentionally submitted 17 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 18 | dual licensed as above, without any additional terms or conditions. 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | **/debug 4 | **/target 5 | 6 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 7 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 8 | **/Cargo.lock 9 | 10 | # These are backup files generated by rustfmt 11 | **/*.rs.bk 12 | 13 | # MSVC Windows builds of rustc generate these, which store debugging information 14 | *.pdb 15 | 16 | .vscode/ 17 | 18 | # data folder for test output 19 | data 20 | core/reports 21 | 22 | # jupyter notebooks 23 | *.ipynb 24 | 25 | # uv.lock 26 | uv.lock 27 | 28 | .env 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cherry-core 2 | 3 | Core libraries for `cherry` blockchain data pipeline framework. 4 | 5 | ## License 6 | 7 | Licensed under either of 8 | 9 | * Apache License, Version 2.0 10 | ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 11 | * MIT license 12 | ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 13 | 14 | at your option. 15 | 16 | ## Contribution 17 | 18 | Unless you explicitly state otherwise, any contribution intentionally submitted 19 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall be 20 | dual licensed as above, without any additional terms or conditions. 21 | -------------------------------------------------------------------------------- /cast/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-cast" 3 | version = "0.3.0" 4 | edition = "2021" 5 | description = "Library for casting arrow colums with support for specific blockchain types" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | ruint = { workspace = true } 14 | anyhow = { workspace = true } 15 | faster-hex = { workspace = true } 16 | bs58 = { workspace = true } 17 | alloy-primitives = { workspace = true } 18 | 19 | [dev-dependencies] 20 | parquet = { workspace = true } 21 | -------------------------------------------------------------------------------- /evm-decode/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-evm-decode" 3 | version = "0.2.0" 4 | edition = "2021" 5 | description = "EVM decoding implementations in Arrow format" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | alloy-dyn-abi = { workspace = true } 14 | alloy-json-abi = { workspace = true } 15 | alloy-primitives = { workspace = true } 16 | ruint = { workspace = true } 17 | anyhow = { workspace = true } 18 | log = { workspace = true } 19 | 20 | [dev-dependencies] 21 | parquet = { workspace = true } 22 | -------------------------------------------------------------------------------- /svm-decode/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-svm-decode" 3 | version = "0.4.0" 4 | edition = "2021" 5 | description = "SVM decoding implementations in Arrow format" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt , Yule "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | arrow = { workspace = true } 13 | anyhow = { workspace = true } 14 | log = { workspace = true } 15 | pyo3 = { workspace = true, features = ["anyhow"], optional = true } 16 | bs58 = { workspace = true } 17 | base64 = { workspace = true } 18 | 19 | [features] 20 | pyo3 = ["dep:pyo3"] 21 | 22 | [dev-dependencies] 23 | parquet = { workspace = true } 24 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | resolver = "2" 3 | members = [ 4 | "evm-decode", 5 | "cast", 6 | "core", 7 | "evm-schema", 8 | "python", 9 | "ingest", 10 | "svm-schema", 11 | "query", 12 | "svm-decode", 13 | ] 14 | 15 | [workspace.dependencies] 16 | arrow = "56" 17 | parquet = "56" 18 | alloy-dyn-abi = "1" 19 | alloy-json-abi = "1" 20 | alloy-primitives = "1" 21 | alloy-consensus = "1" 22 | alloy-eips = "1" 23 | alloy-multicall = "1" 24 | alloy-sol-types = { version = "1", features = ["json"] } 25 | alloy-provider = "1" 26 | ruint = "1" 27 | anyhow = "1" 28 | log = "0.4" 29 | faster-hex = "0.10" 30 | futures-lite = "2" 31 | tokio = { version = "1", default-features = false } 32 | tokio-stream = "0.1" 33 | pyo3 = "0.25" 34 | sqd-portal-client = "0.2" 35 | hypersync-client = "0.19" 36 | serde = "1" 37 | serde_json = "1" 38 | polars-arrow = "0.42" 39 | bincode = "1" 40 | chrono = "0.4" 41 | bs58 = "0.5" 42 | base64 = "0.22.1" 43 | rayon = "1" 44 | hashbrown = "0.16" 45 | xxhash-rust = { version = "0.8", features = ["xxh3"] } 46 | -------------------------------------------------------------------------------- /python/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | .pytest_cache/ 6 | *.py[cod] 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | .venv/ 14 | env/ 15 | bin/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | include/ 26 | man/ 27 | venv/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | pip-selfcheck.json 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | 48 | # Mr Developer 49 | .mr.developer.cfg 50 | .project 51 | .pydevproject 52 | 53 | # Rope 54 | .ropeproject 55 | 56 | # Django stuff: 57 | *.log 58 | *.pot 59 | 60 | .DS_Store 61 | 62 | # Sphinx documentation 63 | docs/_build/ 64 | 65 | # PyCharm 66 | .idea/ 67 | 68 | # VSCode 69 | .vscode/ 70 | 71 | # Pyenv 72 | .python-version 73 | 74 | .ruff_cache 75 | -------------------------------------------------------------------------------- /python/examples/hypersync_continuous.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest 2 | import asyncio 3 | 4 | 5 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 6 | stream = ingest.start_stream(provider, query) 7 | 8 | while True: 9 | res = await stream.next() 10 | if res is None: 11 | break 12 | 13 | print(res["blocks"].column("number")) 14 | 15 | 16 | query = ingest.Query( 17 | kind=ingest.QueryKind.EVM, 18 | params=ingest.evm.Query( 19 | from_block=21930160, 20 | include_all_blocks=True, 21 | fields=ingest.evm.Fields( 22 | block=ingest.evm.BlockFields( 23 | number=True, 24 | ), 25 | ), 26 | ), 27 | ) 28 | 29 | asyncio.run( 30 | run( 31 | ingest.ProviderConfig( 32 | kind=ingest.ProviderKind.HYPERSYNC, 33 | stop_on_head=False, # default is False as well 34 | head_poll_interval_millis=1000, # default is 1000 35 | ), 36 | query=query, 37 | ) 38 | ) 39 | -------------------------------------------------------------------------------- /python/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-core-python" 3 | version = "0.7.0" 4 | edition = "2021" 5 | description = "Core library for cherry blockchain data framework" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [lib] 12 | name = "cherry_core" 13 | crate-type = ["cdylib"] 14 | 15 | [dependencies] 16 | pyo3 = { workspace = true, features = ["anyhow", "experimental-async"] } 17 | baselib = { package = "cherry-core", path = "../core", features = ["pyo3"] } 18 | arrow = { workspace = true , features = ["pyarrow"] } 19 | anyhow = { workspace = true } 20 | faster-hex = { workspace = true } 21 | futures-lite = { workspace = true } 22 | sqd-portal-client = { workspace = true } 23 | tokio = { workspace = true, features = ["full"] } 24 | env_logger = "0.11" 25 | bs58 = { workspace = true } 26 | tikv-jemallocator = { version = "0.6", features = ["disable_initial_exec_tls", "unprefixed_malloc_on_supported_platforms"] } 27 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /python/examples/sqd_continuous.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest 2 | import asyncio 3 | 4 | 5 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 6 | stream = ingest.start_stream(provider, query) 7 | 8 | while True: 9 | res = await stream.next() 10 | if res is None: 11 | break 12 | 13 | print(res["blocks"].column("number")) 14 | 15 | 16 | query = ingest.Query( 17 | kind=ingest.QueryKind.EVM, 18 | params=ingest.evm.Query( 19 | from_block=21930160, 20 | include_all_blocks=True, 21 | fields=ingest.evm.Fields( 22 | block=ingest.evm.BlockFields( 23 | number=True, 24 | ), 25 | ), 26 | ), 27 | ) 28 | 29 | asyncio.run( 30 | run( 31 | ingest.ProviderConfig( 32 | kind=ingest.ProviderKind.SQD, 33 | url="https://portal.sqd.dev/datasets/ethereum-mainnet", 34 | stop_on_head=False, # default is False as well 35 | head_poll_interval_millis=1000, # default is 1000 36 | ), 37 | query=query, 38 | ) 39 | ) 40 | -------------------------------------------------------------------------------- /python/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-core" 3 | version = "0.7.0" 4 | edition = "2021" 5 | description = "Core library for cherry blockchain data framework" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | cherry-evm-decode = { path = "../evm-decode", version = "0.2.0" } 13 | cherry-cast = { path = "../cast", version = "0.3.0" } 14 | cherry-svm-decode = { path = "../svm-decode", version = "0.4.0" } 15 | cherry-evm-schema = { path = "../evm-schema", version = "0.2.0" } 16 | cherry-svm-schema = { path = "../svm-schema", version = "0.2.0" } 17 | cherry-ingest = { path = "../ingest", version ="0.5.0" } 18 | cherry-query = { path = "../query", version = "0.3.0" } 19 | 20 | [dev-dependencies] 21 | hypersync-client = { workspace = true } 22 | tokio = { version = "1", features = ["full"] } 23 | polars-arrow = { workspace = true } 24 | arrow = { workspace = true, features = ["ffi"]} 25 | serde_json = "1" 26 | futures-lite = "2" 27 | faster-hex = { workspace = true } 28 | 29 | [features] 30 | pyo3 = ["cherry-ingest/pyo3", "cherry-svm-decode/pyo3"] 31 | -------------------------------------------------------------------------------- /python/examples/v2_pool_tokens.py: -------------------------------------------------------------------------------- 1 | import cherry_core 2 | import polars as pl 3 | 4 | 5 | def main(): 6 | # Test addresses including invalid and valid ones 7 | pool_addresses = [ 8 | "Invalid address", 9 | "0xfBB6Eed8e7aa03B138556eeDaF5D271A5E1e43ef", # cbBTC/USDC on uniswap v3 10 | "0x31f609019d0CC0b8cC865656142d6FeD69853689", # POPCAT/WETH on uniswap v2 11 | "0x6cDcb1C4A4D1C3C6d054b27AC5B77e89eAFb971d", # AERO/USDC on Aerodrome 12 | "0x323b43332F97B1852D8567a08B1E8ed67d25A8d5", # msETH/WETH on Pancake Swap 13 | ] 14 | 15 | # Test get_pools_token0_token1 16 | print("Testing get_pools_token0_token1:") 17 | pool_tokens = cherry_core.get_pools_token0_token1( 18 | "https://base-rpc.publicnode.com", pool_addresses 19 | ) 20 | print("Pool tokens as list of dictionaries:") 21 | for pool in pool_tokens: 22 | print(pool) 23 | print("\n") 24 | 25 | # Test get_pools_token0_token1_as_table 26 | print("Testing get_pools_token0_token1_as_table:") 27 | pool_tokens_table = cherry_core.get_pools_token0_token1_as_table( 28 | "https://base-rpc.publicnode.com", pool_addresses 29 | ) 30 | # Convert to polars DataFrame for better display 31 | df = pl.from_arrow(pool_tokens_table) 32 | print("Pool tokens as table:") 33 | print(df) 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /python/examples/token_metadata.py: -------------------------------------------------------------------------------- 1 | import cherry_core 2 | import polars as pl 3 | 4 | 5 | def main(): 6 | # Test addresses including invalid and valid ones 7 | addresses = [ 8 | "Invalid address", 9 | "0x0000000000000000000000000000000000000000", # Zero address 10 | "0x6B175474E89094C44Da98b954EedeAC495271d0F", # DAI 11 | "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48", # USDC 12 | "0xae7ab96520DE3A18E5e111B5EaAb095312D7fE84", # stETH 13 | ] 14 | 15 | # Test get_token_metadata 16 | print("Testing get_token_metadata:") 17 | token_metadata = cherry_core.get_token_metadata( 18 | "https://ethereum-rpc.publicnode.com", addresses 19 | ) 20 | print("Token metadata as list of dictionaries:") 21 | for metadata in token_metadata: 22 | print(metadata) 23 | print("\n") 24 | 25 | # Test get_token_metadata_as_table 26 | print("Testing get_token_metadata_as_table:") 27 | token_metadata_table = cherry_core.get_token_metadata_as_table( 28 | "https://ethereum-rpc.publicnode.com", 29 | addresses, 30 | { 31 | "decimals": True, 32 | "symbol": False, 33 | "name": True, 34 | "total_supply": True, 35 | }, 36 | ) 37 | # Convert to pandas DataFrame for better display 38 | df = pl.from_arrow(token_metadata_table) 39 | print("Token metadata as table:") 40 | print(df) 41 | 42 | 43 | if __name__ == "__main__": 44 | main() 45 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.ref }} 13 | cancel-in-progress: true 14 | 15 | env: 16 | CARGO_TERM_COLOR: always 17 | 18 | jobs: 19 | test_dev: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v3 23 | - uses: Swatinem/rust-cache@v2 24 | - name: Install Dependencies 25 | run: | 26 | export DEBIAN_FRONTEND=noninteractive 27 | sudo apt-get install -y capnproto libcapnp-dev 28 | - name: Build 29 | run: cargo build 30 | - name: Test 31 | run: cargo test 32 | 33 | test_release: 34 | runs-on: ubuntu-latest 35 | steps: 36 | - uses: actions/checkout@v3 37 | - uses: Swatinem/rust-cache@v2 38 | - name: Install Dependencies 39 | run: | 40 | export DEBIAN_FRONTEND=noninteractive 41 | sudo apt-get install -y capnproto libcapnp-dev 42 | - name: Build 43 | run: cargo build --release 44 | - name: Test 45 | run: cargo test --release 46 | 47 | lint: 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v3 51 | - uses: Swatinem/rust-cache@v2 52 | - name: Install Dependencies 53 | run: | 54 | export DEBIAN_FRONTEND=noninteractive 55 | sudo apt-get install -y capnproto libcapnp-dev 56 | - name: Rustfmt 57 | run: cargo fmt --check 58 | - name: Clippy 59 | run: cargo clippy -- -Dwarnings 60 | 61 | -------------------------------------------------------------------------------- /ingest/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "cherry-ingest" 3 | version = "0.5.0" 4 | edition = "2021" 5 | description = "Library for ingesting evm data using common a query/response format" 6 | homepage = "https://github.com/steelcake/cherry-core" 7 | repository = "https://github.com/steelcake/cherry-core" 8 | authors = ["Ozgur Akkurt "] 9 | license = "MIT OR Apache-2.0" 10 | 11 | [dependencies] 12 | sqd-portal-client = { workspace = true } 13 | futures-lite = { workspace = true } 14 | anyhow = { workspace = true } 15 | arrow = { workspace = true, features = ["ffi"] } 16 | polars-arrow = { workspace = true } 17 | faster-hex = { workspace = true } 18 | tokio = { workspace = true, features = ["sync"] } 19 | log = { workspace = true } 20 | tokio-stream = { workspace = true } 21 | pyo3 = { workspace = true, features = ["anyhow"], optional = true } 22 | hypersync-client = { workspace = true } 23 | serde = { workspace = true, features = ["derive"]} 24 | serde_json = { workspace = true } 25 | bincode = { workspace = true } 26 | alloy-eips = { workspace = true } 27 | alloy-primitives = { workspace = true, features = ["serde"] } 28 | bs58 = { workspace = true } 29 | rayon = { workspace = true } 30 | 31 | cherry-evm-schema = { path = "../evm-schema", version = "0.2.0" } 32 | cherry-cast = { path = "../cast", version = "0.3.0" } 33 | cherry-query = { path = "../query", version = "0.3.0" } 34 | cherry-svm-schema = { path = "../svm-schema", version = "0.2.0" } 35 | 36 | [dev-dependencies] 37 | parquet = { workspace = true } 38 | bs58 = { workspace = true } 39 | 40 | [features] 41 | pyo3 = ["dep:pyo3"] 42 | -------------------------------------------------------------------------------- /python/examples/v_r_s_schema.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest 2 | import asyncio 3 | import argparse 4 | 5 | 6 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 7 | stream = ingest.start_stream(provider, query) 8 | 9 | while True: 10 | res = await stream.next() 11 | if res is None: 12 | break 13 | 14 | print(res["transactions"].schema) 15 | 16 | 17 | async def main(provider_kind: ingest.ProviderKind): 18 | query = ingest.Query( 19 | kind=ingest.QueryKind.EVM, 20 | params=ingest.evm.Query( 21 | from_block=0, 22 | transactions=[ingest.evm.TransactionRequest()], 23 | fields=ingest.evm.Fields( 24 | transaction=ingest.evm.TransactionFields( 25 | v=True, 26 | r=True, 27 | s=True, 28 | ), 29 | ), 30 | ), 31 | ) 32 | 33 | url = None 34 | 35 | if provider_kind == ingest.ProviderKind.SQD: 36 | url = "https://portal.sqd.dev/datasets/ethereum-mainnet" 37 | elif provider_kind == ingest.ProviderKind.HYPERSYNC: 38 | url = "https://eth.hypersync.xyz" 39 | 40 | await run( 41 | ingest.ProviderConfig( 42 | kind=provider_kind, 43 | url=url, 44 | stop_on_head=False, # default is False as well 45 | head_poll_interval_millis=1000, # default is 1000 46 | ), 47 | query=query, 48 | ) 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser(description="example") 53 | 54 | parser.add_argument( 55 | "--provider", 56 | choices=["sqd", "hypersync"], 57 | required=True, 58 | help="Specify the provider ('sqd' or 'hypersync')", 59 | ) 60 | 61 | args = parser.parse_args() 62 | 63 | asyncio.run(main(args.provider)) 64 | -------------------------------------------------------------------------------- /python/cherry_core/svm_decode/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional, Union, TypeAlias, Literal 2 | from dataclasses import dataclass 3 | 4 | PrimitiveType: TypeAlias = Literal[ 5 | "i8", "i16", "i32", "i64", "i128", "u8", "u16", "u32", "u64", "u128", "bool" 6 | ] 7 | ElementType: TypeAlias = Union[ 8 | PrimitiveType, "DynType", "FixedArray", "Array", "Struct", "Enum", "Option" 9 | ] 10 | 11 | 12 | @dataclass 13 | class FixedArray: 14 | element_type: ElementType 15 | size: int 16 | 17 | 18 | @dataclass 19 | class Array: 20 | element_type: ElementType 21 | 22 | 23 | @dataclass 24 | class Field: 25 | name: str 26 | element_type: ElementType 27 | 28 | 29 | @dataclass 30 | class Struct: 31 | fields: List[Field] 32 | 33 | 34 | @dataclass 35 | class Variant: 36 | name: str 37 | element_type: Optional[ElementType] 38 | 39 | 40 | @dataclass 41 | class Enum: 42 | variants: List[Variant] 43 | 44 | 45 | @dataclass 46 | class Option: 47 | element_type: ElementType 48 | 49 | 50 | @dataclass 51 | class ParamInput: 52 | name: str 53 | param_type: ElementType 54 | 55 | 56 | @dataclass 57 | class InstructionSignature: 58 | discriminator: Union[bytes, str] 59 | params: List[ParamInput] 60 | accounts_names: List[str] 61 | 62 | 63 | @dataclass 64 | class LogSignature: 65 | params: List[ParamInput] 66 | 67 | 68 | class DynType: 69 | I8: PrimitiveType = "i8" 70 | I16: PrimitiveType = "i16" 71 | I32: PrimitiveType = "i32" 72 | I64: PrimitiveType = "i64" 73 | I128: PrimitiveType = "i128" 74 | U8: PrimitiveType = "u8" 75 | U16: PrimitiveType = "u16" 76 | U32: PrimitiveType = "u32" 77 | U64: PrimitiveType = "u64" 78 | U128: PrimitiveType = "u128" 79 | Bool: PrimitiveType = "bool" 80 | FixedArray = FixedArray 81 | Array = Array 82 | Struct = Struct 83 | Enum = Enum 84 | Option = Option 85 | -------------------------------------------------------------------------------- /python/examples/cherry_ingest_erc20.py: -------------------------------------------------------------------------------- 1 | import cherry_core 2 | from cherry_core import ingest 3 | import asyncio 4 | import logging 5 | import os 6 | 7 | logging.basicConfig(level=os.environ.get("LOGLEVEL", "DEBUG").upper()) 8 | logger = logging.getLogger(__name__) 9 | 10 | signature = "Transfer(address indexed from, address indexed to, uint256 amount)" 11 | topic0 = cherry_core.evm_signature_to_topic0(signature) 12 | contract_address = "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" 13 | 14 | 15 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 16 | stream = ingest.start_stream(provider, query) 17 | 18 | while True: 19 | res = await stream.next() 20 | if res is None: 21 | break 22 | 23 | logger.info(res["blocks"].column("number")) 24 | logger.debug(res) 25 | 26 | decoded = cherry_core.evm_decode_events(signature, res["logs"]) 27 | logger.debug(decoded) 28 | 29 | 30 | query = ingest.Query( 31 | kind=ingest.QueryKind.EVM, 32 | params=ingest.evm.Query( 33 | from_block=20123123, 34 | to_block=20123223, 35 | logs=[ 36 | ingest.evm.LogRequest( 37 | address=[contract_address], 38 | topic0=[topic0], 39 | include_blocks=True, 40 | ) 41 | ], 42 | fields=ingest.evm.Fields( 43 | block=ingest.evm.BlockFields( 44 | number=True, 45 | ), 46 | log=ingest.evm.LogFields( 47 | data=True, 48 | topic0=True, 49 | topic1=True, 50 | topic2=True, 51 | topic3=True, 52 | ), 53 | ), 54 | ), 55 | ) 56 | 57 | print("running with sqd") 58 | asyncio.run( 59 | run( 60 | ingest.ProviderConfig( 61 | kind=ingest.ProviderKind.SQD, 62 | url="https://portal.sqd.dev/datasets/ethereum-mainnet", 63 | ), 64 | query=query, 65 | ) 66 | ) 67 | 68 | print("running with hypersync") 69 | asyncio.run( 70 | run( 71 | ingest.ProviderConfig( 72 | kind=ingest.ProviderKind.HYPERSYNC, 73 | url="https://eth.hypersync.xyz", 74 | ), 75 | query=query, 76 | ) 77 | ) 78 | -------------------------------------------------------------------------------- /python/examples/solana.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest, base58_encode_bytes 2 | import asyncio 3 | 4 | 5 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 6 | stream = ingest.start_stream(provider, query) 7 | 8 | while True: 9 | res = await stream.next() 10 | if res is None: 11 | break 12 | 13 | # print(res) 14 | 15 | print(res["blocks"].column("slot")) 16 | 17 | for x in res["transactions"].column("signature"): 18 | print(base58_encode_bytes(x.as_py())) 19 | 20 | print(res["instructions"].column("instruction_address")) 21 | print(res["instructions"].column("transaction_index")) 22 | 23 | 24 | query = ingest.Query( 25 | kind=ingest.QueryKind.SVM, 26 | params=ingest.svm.Query( 27 | from_block=332557668, 28 | to_block=332557668, 29 | include_all_blocks=True, 30 | fields=ingest.svm.Fields( 31 | block=ingest.svm.BlockFields( 32 | slot=True, 33 | hash=True, 34 | ), 35 | instruction=ingest.svm.InstructionFields( 36 | program_id=True, 37 | data=True, 38 | instruction_address=True, 39 | transaction_index=True, 40 | ), 41 | transaction=ingest.svm.TransactionFields( 42 | signature=True, 43 | transaction_index=True, 44 | ), 45 | ), 46 | instructions=[ 47 | ingest.svm.InstructionRequest( 48 | program_id=["whirLbMiicVdio4qvUfM5KAg6Ct8VwpYzGff3uctyCc"], 49 | discriminator=["0xf8c6"], 50 | a1=["EV5Xoy9TQc4zXtqRHpjDefrxsQTi4Lm12KAE8axyBsdp"], 51 | # discriminator=[bytes([2, 0, 0, 0, 1, 0, 0, 0])], 52 | include_inner_instructions=True, 53 | include_transactions=True, 54 | is_committed=True, 55 | ) 56 | ], 57 | ), 58 | ) 59 | 60 | print("running with sqd") 61 | asyncio.run( 62 | run( 63 | ingest.ProviderConfig( 64 | kind=ingest.ProviderKind.SQD, 65 | url="https://portal.sqd.dev/datasets/solana-beta", 66 | ), 67 | query=query, 68 | ) 69 | ) 70 | -------------------------------------------------------------------------------- /python/src/ingest.rs: -------------------------------------------------------------------------------- 1 | use std::collections::BTreeMap; 2 | use std::pin::Pin; 3 | 4 | use anyhow::{Context, Result}; 5 | use arrow::{pyarrow::ToPyArrow, record_batch::RecordBatch}; 6 | use baselib::ingest::{ProviderConfig, Query}; 7 | use futures_lite::{Stream, StreamExt}; 8 | use pyo3::prelude::*; 9 | 10 | pub fn ingest_module(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { 11 | let submodule = PyModule::new(py, "ingest")?; 12 | 13 | submodule.add_function(wrap_pyfunction!(start_stream, m)?)?; 14 | 15 | m.add_submodule(&submodule)?; 16 | 17 | Ok(()) 18 | } 19 | 20 | #[pyclass] 21 | #[allow(clippy::type_complexity)] 22 | struct ResponseStream { 23 | inner: Option>> + Send + Sync>>>, 24 | } 25 | 26 | #[pymethods] 27 | impl ResponseStream { 28 | pub fn close(&mut self) { 29 | self.inner.take(); 30 | } 31 | 32 | pub async fn next(&mut self) -> PyResult>> { 33 | let inner = match self.inner.as_mut() { 34 | Some(i) => i, 35 | None => return Ok(None), 36 | }; 37 | 38 | let next: BTreeMap = match inner.next().await { 39 | Some(n) => n.context("get next item from inner stream")?, 40 | None => { 41 | self.inner = None; 42 | return Ok(None); 43 | } 44 | }; 45 | 46 | let mut out = BTreeMap::new(); 47 | 48 | for (table_name, batch) in next.into_iter() { 49 | let batch = 50 | Python::with_gil(|py| batch.to_pyarrow(py).context("map result to pyarrow"))?; 51 | 52 | out.insert(table_name, batch); 53 | } 54 | 55 | Ok(Some(out)) 56 | } 57 | } 58 | 59 | #[pyfunction] 60 | fn start_stream( 61 | provider_config: &Bound<'_, PyAny>, 62 | query: &Bound<'_, PyAny>, 63 | ) -> PyResult { 64 | let cfg: ProviderConfig = provider_config.extract().context("parse provider config")?; 65 | let query: Query = query.extract().context("parse query")?; 66 | 67 | let inner = crate::TOKIO_RUNTIME.block_on(async move { 68 | baselib::ingest::start_stream(cfg, query) 69 | .await 70 | .context("start stream") 71 | })?; 72 | 73 | Ok(ResponseStream { inner: Some(inner) }) 74 | } 75 | -------------------------------------------------------------------------------- /python/examples/cryo_erc20.py: -------------------------------------------------------------------------------- 1 | from cryo import collect as cryo_collect 2 | import cherry_core 3 | import typing 4 | import polars 5 | import pyarrow 6 | 7 | signature = "Transfer(address indexed from, address indexed to, uint256 amount)" 8 | topic0 = cherry_core.evm_signature_to_topic0(signature) 9 | contract_address = "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" 10 | 11 | # get filtered events from last 10 blocks 12 | data = cryo_collect( 13 | datatype="logs", 14 | blocks=["-10:"], 15 | rpc="https://eth.rpc.hypersync.xyz", 16 | output_format="polars", 17 | contract=[contract_address], # type: ignore[arg-type] 18 | topic0=[topic0], # type: ignore[arg-type] 19 | hex=False, 20 | ) 21 | 22 | data = typing.cast(polars.DataFrame, data) 23 | 24 | batches = data.to_arrow().to_batches() 25 | batch = pyarrow.concat_batches(batches) 26 | 27 | # decode events based on the event signature. 28 | # This function automatically infers output types from the signature and can handle arbitrary levels 29 | # of nesting via tuples/lists for example this: https://github.com/steelcake/cherry-core/blob/21534e31ae2e33ae62514765f25d28259ed03129/core/src/tests.rs#L18 30 | decoded = cherry_core.evm_decode_events(signature, batch, allow_decode_fail=False) 31 | 32 | # cast to float since polars can't ffi Int256 physical type yet 33 | # https://github.com/pola-rs/polars/blob/main/crates/polars-arrow/src/ffi/array.rs#L26 34 | # https://github.com/pola-rs/polars/blob/main/crates/polars-arrow/src/util/macros.rs#L25 35 | # 36 | # This function is a helper function to do multiple cast operations at once. It casts 37 | # the named column 'amount' to 128 bit integers in this case. 38 | decoded = cherry_core.cast_by_type( 39 | decoded, pyarrow.decimal256(76, 0), pyarrow.decimal128(38, 0), allow_cast_fail=False 40 | ) 41 | 42 | # convert all binary columns to prefix hex string format like '0xabc' 43 | decoded = cherry_core.prefix_hex_encode(decoded) 44 | 45 | decoded = polars.from_arrow(decoded) 46 | decoded = typing.cast(polars.DataFrame, decoded) 47 | encoded_batch = polars.from_arrow(cherry_core.prefix_hex_encode(batch)) 48 | if isinstance(encoded_batch, polars.DataFrame): 49 | decoded = decoded.hstack(encoded_batch) 50 | else: 51 | raise ValueError("encoded_batch is not a polars.DataFrame") 52 | 53 | print(decoded) 54 | 55 | sum = decoded.get_column("amount").sum() 56 | print(f"total volume is {sum}") 57 | -------------------------------------------------------------------------------- /python/cherry_core/ingest/__init__.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | from dataclasses import dataclass 3 | import pyarrow 4 | from enum import Enum 5 | from . import evm, svm 6 | import cherry_core.cherry_core as cc 7 | 8 | 9 | class ProviderKind(str, Enum): 10 | SQD = "sqd" 11 | HYPERSYNC = "hypersync" 12 | YELLOWSTONE_GRPC = "yellowstone_grpc" 13 | 14 | 15 | class QueryKind(str, Enum): 16 | EVM = "evm" 17 | SVM = "svm" 18 | 19 | 20 | @dataclass 21 | class Query: 22 | kind: QueryKind 23 | params: evm.Query | svm.Query 24 | 25 | 26 | @dataclass 27 | class ProviderConfig: 28 | kind: ProviderKind # (Required) The selected provider. 29 | url: Optional[str] = None # (Optional) The URL of the provider. 30 | bearer_token: Optional[str] = ( 31 | None # (Optional) Optional authentication token for protected APIs. 32 | ) 33 | max_num_retries: Optional[int] = ( 34 | None # (Optional) Maximum number of retries for failed requests. 35 | ) 36 | retry_backoff_ms: Optional[int] = ( 37 | None # (Optional) Delay increases between retries in milliseconds. 38 | ) 39 | retry_base_ms: Optional[int] = None # (Optional) Base retry delay in milliseconds. 40 | retry_ceiling_ms: Optional[int] = ( 41 | None # (Optional) Maximum retry delay in milliseconds. 42 | ) 43 | req_timeout_millis: Optional[int] = ( 44 | None # (Optional) Request timeout in milliseconds. 45 | ) 46 | stop_on_head: bool = False # (Optional) Whether to automatically stop when reaching the blockchain head or keep the pipeline running indefinitely. 47 | head_poll_interval_millis: Optional[int] = ( 48 | None # (Optional) How frequently (in milliseconds) to poll the blockchain head for updates. 49 | ) 50 | buffer_size: Optional[int] = ( 51 | None # (Optional) Determines how many responses store in a buffer before sending them to the consumer. 52 | ) 53 | 54 | 55 | class ResponseStream: 56 | def __init__(self, inner): 57 | self.inner = inner 58 | 59 | def close(self): 60 | self.inner.close() 61 | 62 | async def next(self) -> Optional[Dict[str, pyarrow.RecordBatch]]: 63 | return await self.inner.next() 64 | 65 | 66 | def start_stream(cfg: ProviderConfig, query: Query) -> ResponseStream: 67 | inner = cc.ingest.start_stream(cfg, query) 68 | return ResponseStream(inner) 69 | -------------------------------------------------------------------------------- /python/examples/yellowstone_grpc.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest, base58_encode 2 | import asyncio 3 | import polars 4 | from typing import cast 5 | import pyarrow as pa 6 | import os 7 | 8 | 9 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 10 | stream = ingest.start_stream(provider, query) 11 | 12 | while True: 13 | res = await stream.next() 14 | if res is None: 15 | break 16 | 17 | transactions = cast(polars.DataFrame, polars.from_arrow(res["transactions"])) 18 | token_balances = cast( 19 | polars.DataFrame, polars.from_arrow(res["token_balances"]) 20 | ) 21 | 22 | token_balances = token_balances.join( 23 | transactions, ["block_slot", "transaction_index"] 24 | ) 25 | 26 | for batch in token_balances.to_arrow().to_batches(): 27 | new_batch = batch 28 | for i, col in enumerate(new_batch.columns): 29 | if pa.types.is_large_binary(col.type): 30 | new_batch = new_batch.set_column( 31 | i, new_batch.column_names[i], col.cast(target_type=pa.binary()) 32 | ) 33 | 34 | new_batch = base58_encode(new_batch) 35 | 36 | print(new_batch) 37 | 38 | 39 | query = ingest.Query( 40 | kind=ingest.QueryKind.SVM, 41 | params=ingest.svm.Query( 42 | from_block=317617480, 43 | token_balances=[ 44 | ingest.svm.TokenBalanceRequest( 45 | include_transactions=True, 46 | ) 47 | ], 48 | fields=ingest.svm.Fields( 49 | token_balance=ingest.svm.TokenBalanceFields( 50 | block_slot=True, 51 | transaction_index=True, 52 | post_owner=True, 53 | pre_owner=True, 54 | post_amount=True, 55 | pre_amount=True, 56 | post_mint=True, 57 | account=True, 58 | ), 59 | transaction=ingest.svm.TransactionFields( 60 | block_slot=True, 61 | transaction_index=True, 62 | signature=True, 63 | err=True, 64 | ), 65 | ), 66 | ), 67 | ) 68 | 69 | asyncio.run( 70 | run( 71 | ingest.ProviderConfig( 72 | kind=ingest.ProviderKind.YELLOWSTONE_GRPC, 73 | url=os.environ.get("YELLOWSTONE_GRPC_URL"), 74 | bearer_token=os.environ.get("YELLOWSTONE_GRPC_TOKEN"), 75 | ), 76 | query=query, 77 | ) 78 | ) 79 | -------------------------------------------------------------------------------- /python/examples/svm_decode_log.py: -------------------------------------------------------------------------------- 1 | import pyarrow as pa 2 | import pyarrow.parquet as pq 3 | from pathlib import Path 4 | from cherry_core.svm_decode import ( 5 | LogSignature, 6 | ParamInput, 7 | DynType, 8 | ) 9 | from cherry_core import svm_decode_logs 10 | 11 | current_dir = Path(__file__).parent 12 | input_file = current_dir / "logs.parquet" 13 | output_file = current_dir / "decoded_logs.parquet" 14 | 15 | print(f"Reading input file: {input_file}") 16 | print(f"Will save output to: {output_file}") 17 | 18 | try: 19 | table = pq.read_table(str(input_file)) 20 | 21 | batch = table.to_batches()[0] 22 | 23 | signature = LogSignature( 24 | params=[ 25 | ParamInput( 26 | name="whirlpool", 27 | param_type=DynType.FixedArray(DynType.U8, 32), 28 | ), 29 | ParamInput( 30 | name="a_to_b", 31 | param_type=DynType.Bool, 32 | ), 33 | ParamInput( 34 | name="pre_sqrt_price", 35 | param_type=DynType.U128, 36 | ), 37 | ParamInput( 38 | name="post_sqrt_price", 39 | param_type=DynType.U128, 40 | ), 41 | ParamInput( 42 | name="x", 43 | param_type=DynType.U64, 44 | ), 45 | ParamInput( 46 | name="input_amount", 47 | param_type=DynType.U64, 48 | ), 49 | ParamInput( 50 | name="output_amount", 51 | param_type=DynType.U64, 52 | ), 53 | ParamInput( 54 | name="input_transfer_fee", 55 | param_type=DynType.U64, 56 | ), 57 | ParamInput( 58 | name="output_transfer_fee", 59 | param_type=DynType.U64, 60 | ), 61 | ParamInput( 62 | name="lp_fee", 63 | param_type=DynType.U64, 64 | ), 65 | ParamInput( 66 | name="protocol_fee", 67 | param_type=DynType.U64, 68 | ), 69 | ], 70 | ) 71 | 72 | print("Decoding instruction batch...") 73 | decoded_batch = svm_decode_logs(signature, batch, True) 74 | 75 | decoded_table = pa.Table.from_batches([decoded_batch]) 76 | 77 | print("Saving decoded result...") 78 | pq.write_table(decoded_table, str(output_file)) 79 | 80 | print("Successfully decoded and saved the result!") 81 | 82 | except Exception as e: 83 | print(f"Error during decoding: {e}") 84 | raise 85 | -------------------------------------------------------------------------------- /python/examples/cryo_erc20_datafusion.py: -------------------------------------------------------------------------------- 1 | from cryo import collect as cryo_collect 2 | import cherry_core 3 | import typing 4 | import polars 5 | import pyarrow 6 | 7 | signature = "Transfer(address indexed from, address indexed to, uint256 amount)" 8 | topic0 = cherry_core.evm_signature_to_topic0(signature) 9 | contract_address = "0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48" 10 | 11 | # get filtered events from last 10 blocks 12 | data = cryo_collect( 13 | datatype="logs", 14 | blocks=["-10:"], 15 | rpc="https://eth.rpc.hypersync.xyz", 16 | output_format="polars", 17 | contract=[contract_address], # type: ignore[arg-type] 18 | topic0=[topic0], # type: ignore[arg-type] 19 | hex=False, 20 | ) 21 | 22 | data = typing.cast(polars.DataFrame, data) 23 | 24 | batches = data.to_arrow().to_batches() 25 | batch = pyarrow.concat_batches(batches) 26 | 27 | # cast large_binary columns to regular binary, not sure why all binary columns end up being large_binary 28 | for i, col in enumerate(batch.columns): 29 | if pyarrow.types.is_large_binary(col.type): 30 | batch = batch.set_column( 31 | i, batch.column_names[i], col.cast(target_type=pyarrow.binary()) 32 | ) 33 | 34 | # decode events based on the event signature. 35 | # This function automatically infers output types from the signature and can handle arbitrary levels 36 | # of nesting via tuples/lists for example this: https://github.com/steelcake/cherry-core/blob/21534e31ae2e33ae62514765f25d28259ed03129/core/src/tests.rs#L18 37 | decoded = cherry_core.evm_decode_events(signature, batch, allow_decode_fail=False) 38 | 39 | # cast to float since polars can't ffi Int256 physical type yet 40 | # https://github.com/pola-rs/polars/blob/main/crates/polars-arrow/src/ffi/array.rs#L26 41 | # https://github.com/pola-rs/polars/blob/main/crates/polars-arrow/src/util/macros.rs#L25 42 | # 43 | # This function is a helper function to do multiple cast operations at once. It casts 44 | # the named column 'amount' to 128 bit integers in this case. 45 | decoded = cherry_core.cast( 46 | [("amount", pyarrow.decimal128(38, 0))], decoded, allow_cast_fail=False 47 | ) 48 | 49 | # convert all binary columns to prefix hex string format like '0xabc' 50 | decoded = cherry_core.prefix_hex_encode(decoded) 51 | 52 | decoded = polars.from_arrow(decoded) 53 | decoded = typing.cast(polars.DataFrame, decoded) 54 | encoded_batch = polars.from_arrow(cherry_core.prefix_hex_encode(batch)) 55 | if isinstance(encoded_batch, polars.DataFrame): 56 | decoded = decoded.hstack(encoded_batch) 57 | else: 58 | raise ValueError("encoded_batch is not a polars.DataFrame") 59 | 60 | print(decoded) 61 | 62 | sum = decoded.get_column("amount").sum() 63 | print(f"total volume is {sum}") 64 | -------------------------------------------------------------------------------- /.github/workflows/publish-python.yaml: -------------------------------------------------------------------------------- 1 | name: publish-python 2 | 3 | concurrency: 4 | group: ${{ github.workflow }}-${{ github.ref }} 5 | cancel-in-progress: true 6 | 7 | on: 8 | workflow_dispatch: null 9 | 10 | permissions: 11 | contents: read 12 | 13 | jobs: 14 | linux: 15 | runs-on: ${{ matrix.platform.runner }} 16 | strategy: 17 | matrix: 18 | platform: 19 | - runner: ubuntu-latest 20 | target: x86_64 21 | python-architecture: x64 22 | - runner: ubuntu-latest 23 | target: aarch64 24 | python-architecture: arm64 25 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 26 | steps: 27 | - uses: actions/checkout@v4 28 | - uses: actions/setup-python@v5 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | architecture: ${{ matrix.python-architecture }} 32 | - name: Build wheels 33 | uses: PyO3/maturin-action@v1 34 | with: 35 | target: ${{ matrix.platform.target }} 36 | args: --release --out dist --interpreter python${{ matrix.python-version }} --zig --manifest-path ./python/Cargo.toml 37 | manylinux: auto 38 | before-script-linux: sudo apt-get install -y capnproto libcapnp-dev 39 | - name: Upload wheels 40 | uses: actions/upload-artifact@v4 41 | with: 42 | name: wheels-linux-${{ matrix.platform.target }}-python${{ matrix.python-version }} 43 | path: dist 44 | 45 | macos: 46 | runs-on: ${{ matrix.platform.runner }} 47 | strategy: 48 | matrix: 49 | platform: 50 | - runner: macos-13 51 | target: x86_64 52 | python-architecture: x64 53 | - runner: macos-14 54 | target: aarch64 55 | python-architecture: arm64 56 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 57 | steps: 58 | - uses: actions/checkout@v4 59 | - uses: actions/setup-python@v5 60 | with: 61 | python-version: ${{ matrix.python-version }} 62 | architecture: ${{ matrix.platform.python-architecture }} 63 | - name: capnproto 64 | run: brew install capnp 65 | - name: Build wheels 66 | uses: PyO3/maturin-action@v1 67 | with: 68 | target: ${{ matrix.platform.target }} 69 | args: --release --out dist --interpreter python${{ matrix.python-version }} --manifest-path ./python/Cargo.toml 70 | - name: Upload wheels 71 | uses: actions/upload-artifact@v4 72 | with: 73 | name: wheels-macos-${{ matrix.platform.target }}-python${{ matrix.python-version }} 74 | path: dist 75 | 76 | sdist: 77 | runs-on: ubuntu-latest 78 | steps: 79 | - uses: actions/checkout@v4 80 | - name: Build sdist 81 | uses: PyO3/maturin-action@v1 82 | with: 83 | command: sdist 84 | args: --out dist --manifest-path ./python/Cargo.toml 85 | - name: Upload sdist 86 | uses: actions/upload-artifact@v4 87 | with: 88 | name: wheels-sdist 89 | path: dist 90 | 91 | release: 92 | name: Release 93 | runs-on: ubuntu-latest 94 | needs: [linux, macos, sdist] 95 | steps: 96 | - uses: actions/download-artifact@v4 97 | - name: Publish to PyPI 98 | uses: PyO3/maturin-action@v1 99 | env: 100 | MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} 101 | with: 102 | command: upload 103 | args: --non-interactive --skip-existing wheels-*/* 104 | -------------------------------------------------------------------------------- /python/cherry_core/__init__.py: -------------------------------------------------------------------------------- 1 | import cherry_core.cherry_core as cc 2 | from . import svm_decode 3 | from typing import Tuple 4 | import pyarrow 5 | 6 | 7 | def cast( 8 | map: list[Tuple[str, pyarrow.DataType]], 9 | data: pyarrow.RecordBatch, 10 | allow_cast_fail: bool = False, 11 | ) -> pyarrow.RecordBatch: 12 | return cc.cast(map, data, allow_cast_fail) 13 | 14 | 15 | def cast_schema( 16 | map: list[Tuple[str, pyarrow.DataType]], schema: pyarrow.Schema 17 | ) -> pyarrow.Schema: 18 | return cc.cast_schema(map, schema) 19 | 20 | 21 | def cast_by_type( 22 | data: pyarrow.RecordBatch, 23 | from_type: pyarrow.DataType, 24 | to_type: pyarrow.DataType, 25 | allow_cast_fail: bool = False, 26 | ) -> pyarrow.RecordBatch: 27 | return cc.cast_by_type(data, from_type, to_type, allow_cast_fail) 28 | 29 | 30 | def cast_schema_by_type( 31 | schema: pyarrow.Schema, from_type: pyarrow.DataType, to_type: pyarrow.DataType 32 | ) -> pyarrow.Schema: 33 | return cc.cast_schema_by_type(schema, from_type, to_type) 34 | 35 | 36 | def base58_encode(data: pyarrow.RecordBatch) -> pyarrow.RecordBatch: 37 | return cc.base58_encode(data) 38 | 39 | 40 | def hex_encode(data: pyarrow.RecordBatch) -> pyarrow.RecordBatch: 41 | return cc.hex_encode(data) 42 | 43 | 44 | def prefix_hex_encode(data: pyarrow.RecordBatch) -> pyarrow.RecordBatch: 45 | return cc.prefix_hex_encode(data) 46 | 47 | 48 | def base58_encode_column(col: pyarrow.Array) -> pyarrow.Array: 49 | return cc.base58_encode_column(col) 50 | 51 | 52 | def hex_encode_column(col: pyarrow.Array) -> pyarrow.Array: 53 | return cc.hex_encode_column(col) 54 | 55 | 56 | def prefix_hex_encode_column(col: pyarrow.Array) -> pyarrow.Array: 57 | return cc.prefix_hex_encode_column(col) 58 | 59 | 60 | def base58_decode_column(col: pyarrow.Array) -> pyarrow.Array: 61 | return cc.base58_decode_column(col) 62 | 63 | 64 | def hex_decode_column(col: pyarrow.Array) -> pyarrow.Array: 65 | return cc.hex_decode_column(col) 66 | 67 | 68 | def prefix_hex_decode_column(col: pyarrow.Array) -> pyarrow.Array: 69 | return cc.prefix_hex_decode_column(col) 70 | 71 | 72 | def u256_column_from_binary(col: pyarrow.Array) -> pyarrow.Array: 73 | return cc.u256_column_from_binary(col) 74 | 75 | 76 | def u256_column_to_binary(col: pyarrow.Array) -> pyarrow.Array: 77 | return cc.u256_column_to_binary(col) 78 | 79 | 80 | def u256_to_binary(data: pyarrow.RecordBatch) -> pyarrow.RecordBatch: 81 | return cc.u256_to_binary(data) 82 | 83 | 84 | def svm_decode_instructions( 85 | signature: svm_decode.InstructionSignature, 86 | batch: pyarrow.RecordBatch, 87 | allow_decode_fail: bool = False, 88 | ) -> pyarrow.RecordBatch: 89 | return cc.svm_decode_instructions(signature, batch, allow_decode_fail) 90 | 91 | 92 | def svm_decode_logs( 93 | signature: svm_decode.LogSignature, 94 | batch: pyarrow.RecordBatch, 95 | allow_decode_fail: bool = False, 96 | ) -> pyarrow.RecordBatch: 97 | return cc.svm_decode_logs(signature, batch, allow_decode_fail) 98 | 99 | 100 | def instruction_signature_to_arrow_schema( 101 | signature: svm_decode.InstructionSignature, 102 | ) -> pyarrow.Schema: 103 | return cc.instruction_signature_to_arrow_schema(signature) 104 | 105 | 106 | def evm_decode_call_inputs( 107 | signature: str, data: pyarrow.Array, allow_decode_fail: bool = False 108 | ) -> pyarrow.RecordBatch: 109 | return cc.evm_decode_call_inputs(signature, data, allow_decode_fail) 110 | 111 | 112 | def evm_decode_call_outputs( 113 | signature: str, data: pyarrow.Array, allow_decode_fail: bool = False 114 | ) -> pyarrow.RecordBatch: 115 | return cc.evm_decode_call_outputs(signature, data, allow_decode_fail) 116 | 117 | 118 | def evm_decode_events( 119 | signature: str, data: pyarrow.RecordBatch, allow_decode_fail: bool = False 120 | ) -> pyarrow.RecordBatch: 121 | return cc.evm_decode_events(signature, data, allow_decode_fail) 122 | 123 | 124 | def evm_event_signature_to_arrow_schema(signature: str) -> pyarrow.Schema: 125 | return cc.evm_event_signature_to_arrow_schema(signature) 126 | 127 | 128 | def evm_transaction_signature_to_arrow_schemas( 129 | signature: str, 130 | ) -> Tuple[pyarrow.Schema, pyarrow.Schema]: 131 | return cc.evm_transaction_signature_to_arrow_schemas(signature) 132 | 133 | 134 | def evm_signature_to_topic0(signature: str) -> str: 135 | return cc.evm_signature_to_topic0(signature) 136 | 137 | 138 | def base58_encode_bytes(b: bytes) -> str: 139 | return cc.base58_encode_bytes(b) 140 | 141 | 142 | def base58_decode_string(s: str) -> bytes: 143 | return cc.base58_decode_string(s) 144 | -------------------------------------------------------------------------------- /core/src/tests.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use cherry_evm_decode::{decode_events, signature_to_topic0}; 4 | use cherry_ingest::evm::{Address, Topic}; 5 | use futures_lite::StreamExt; 6 | use hypersync_client::{self, ClientConfig, StreamConfig}; 7 | 8 | async fn erc20(cfg: cherry_ingest::ProviderConfig, query: cherry_ingest::Query) { 9 | let signature = "Transfer(address indexed from, address indexed to, uint256 amount)"; 10 | let mut stream = cherry_ingest::start_stream(cfg, query).await.unwrap(); 11 | 12 | while let Some(v) = stream.next().await { 13 | let v = v.unwrap(); 14 | let decoded = decode_events(signature, v.get("logs").unwrap(), false).unwrap(); 15 | dbg!(decoded); 16 | } 17 | } 18 | 19 | fn erc20_query() -> cherry_ingest::Query { 20 | let signature = "Transfer(address indexed from, address indexed to, uint256 amount)"; 21 | cherry_ingest::Query::Evm(cherry_ingest::evm::Query { 22 | from_block: 18123123, 23 | to_block: Some(18123222), 24 | fields: cherry_ingest::evm::Fields::all(), 25 | logs: vec![cherry_ingest::evm::LogRequest { 26 | address: vec![Address(decode_hex( 27 | "0xdAC17F958D2ee523a2206206994597C13D831ec7", 28 | ))], 29 | topic0: vec![Topic(signature_to_topic0(signature).unwrap())], 30 | ..Default::default() 31 | }], 32 | ..Default::default() 33 | }) 34 | } 35 | 36 | #[tokio::test(flavor = "multi_thread")] 37 | #[ignore] 38 | async fn erc20_hypersync() { 39 | let provider = cherry_ingest::ProviderConfig::new(cherry_ingest::ProviderKind::Hypersync); 40 | let query = erc20_query(); 41 | erc20(provider, query).await; 42 | } 43 | 44 | #[tokio::test(flavor = "multi_thread")] 45 | #[ignore] 46 | async fn erc20_sqd() { 47 | let provider = cherry_ingest::ProviderConfig { 48 | url: Some("https://portal.sqd.dev/datasets/ethereum-mainnet".to_owned()), 49 | ..cherry_ingest::ProviderConfig::new(cherry_ingest::ProviderKind::Sqd) 50 | }; 51 | erc20(provider, erc20_query()).await; 52 | } 53 | 54 | fn decode_hex(hex: &str) -> [u8; N] { 55 | let mut dst = [0; N]; 56 | faster_hex::hex_decode( 57 | hex.strip_prefix("0x").unwrap().as_bytes(), 58 | dst.as_mut_slice(), 59 | ) 60 | .unwrap(); 61 | dst 62 | } 63 | 64 | #[tokio::test(flavor = "multi_thread")] 65 | #[ignore] 66 | async fn decode_nested_list() { 67 | let client = hypersync_client::Client::new(ClientConfig { 68 | url: Some("https://10.hypersync.xyz".parse().unwrap()), 69 | ..Default::default() 70 | }) 71 | .unwrap(); 72 | let client = Arc::new(client); 73 | 74 | let signature = 75 | "ConfiguredQuests(address editor, uint256[] questIdList, (bool, bool, bool)[] details)"; 76 | 77 | let query = serde_json::from_value(serde_json::json!({ 78 | "from_block": 0, 79 | "logs": [{ 80 | "address": ["0xC5893DcAB9AD32Fa47923FEbdE89883C62BfFbd6"], 81 | "topics": [[hypersync_client::format::LogArgument::try_from(signature_to_topic0(signature).unwrap().as_slice()).unwrap()]] 82 | }], 83 | "field_selection": { 84 | "log": hypersync_client::schema::log() 85 | .fields 86 | .iter() 87 | .map(|f| f.name.clone()) 88 | .collect::>(), 89 | } 90 | })).unwrap(); 91 | 92 | let res = client 93 | .collect_arrow(query, StreamConfig::default()) 94 | .await 95 | .unwrap(); 96 | 97 | let logs = res.data.logs.iter().map(polars_arrow_to_arrow_rs); 98 | 99 | for batch in logs { 100 | let decoded = decode_events(signature, &batch, false).unwrap(); 101 | 102 | dbg!(decoded); 103 | } 104 | } 105 | 106 | fn polars_arrow_to_arrow_rs( 107 | batch: &hypersync_client::ArrowBatch, 108 | ) -> arrow::record_batch::RecordBatch { 109 | let data_type = polars_arrow::datatypes::ArrowDataType::Struct(batch.schema.fields.clone()); 110 | let arr = polars_arrow::array::StructArray::new( 111 | data_type.clone(), 112 | batch.chunk.columns().to_vec(), 113 | None, 114 | ); 115 | 116 | let arr: arrow::ffi::FFI_ArrowArray = 117 | unsafe { std::mem::transmute(polars_arrow::ffi::export_array_to_c(Box::new(arr))) }; 118 | let schema: arrow::ffi::FFI_ArrowSchema = unsafe { 119 | std::mem::transmute(polars_arrow::ffi::export_field_to_c( 120 | &polars_arrow::datatypes::Field::new("", data_type, false), 121 | )) 122 | }; 123 | 124 | let mut arr_data = unsafe { arrow::ffi::from_ffi(arr, &schema).unwrap() }; 125 | 126 | arr_data.align_buffers(); 127 | 128 | let arr = arrow::array::StructArray::from(arr_data); 129 | 130 | arrow::record_batch::RecordBatch::from(arr) 131 | } 132 | -------------------------------------------------------------------------------- /python/examples/solana_all.py: -------------------------------------------------------------------------------- 1 | from cherry_core import ingest 2 | import pyarrow.parquet as pq 3 | import pyarrow as pa 4 | import asyncio 5 | 6 | 7 | async def run(provider: ingest.ProviderConfig, query: ingest.Query): 8 | stream = ingest.start_stream(provider, query) 9 | 10 | while True: 11 | res = await stream.next() 12 | if res is None: 13 | break 14 | 15 | print(res.keys()) 16 | 17 | for key in res.keys(): 18 | table = res[key] 19 | table = pa.Table.from_batches([table]) 20 | pq.write_table(table, f"data/{key}_{start_block}_{end_block}.parquet") 21 | 22 | 23 | start_block = 317617480 24 | end_block = 317617580 25 | url = "https://portal.sqd.dev/datasets/solana-mainnet" 26 | 27 | query = ingest.Query( 28 | kind=ingest.QueryKind.SVM, 29 | params=ingest.svm.Query( 30 | from_block=start_block, 31 | to_block=end_block, 32 | include_all_blocks=True, 33 | fields=ingest.svm.Fields( 34 | block=ingest.svm.BlockFields( 35 | slot=True, 36 | hash=True, 37 | parent_slot=True, 38 | parent_hash=True, 39 | height=True, 40 | timestamp=True, 41 | ), 42 | instruction=ingest.svm.InstructionFields( 43 | block_slot=True, 44 | block_hash=True, 45 | transaction_index=True, 46 | instruction_address=True, 47 | program_id=True, 48 | a0=True, 49 | a1=True, 50 | a2=True, 51 | a3=True, 52 | a4=True, 53 | a5=True, 54 | a6=True, 55 | a7=True, 56 | a8=True, 57 | a9=True, 58 | rest_of_accounts=True, 59 | data=True, 60 | d1=True, 61 | d2=True, 62 | d4=True, 63 | d8=True, 64 | error=True, 65 | compute_units_consumed=True, 66 | is_committed=True, 67 | has_dropped_log_messages=True, 68 | ), 69 | transaction=ingest.svm.TransactionFields( 70 | block_slot=True, 71 | block_hash=True, 72 | transaction_index=True, 73 | signature=True, 74 | version=True, 75 | account_keys=True, 76 | address_table_lookups=True, 77 | num_readonly_signed_accounts=True, 78 | num_readonly_unsigned_accounts=True, 79 | num_required_signatures=True, 80 | # recent_blockhash=True, 81 | signatures=True, 82 | err=True, 83 | fee=True, 84 | compute_units_consumed=True, 85 | loaded_readonly_addresses=True, 86 | loaded_writable_addresses=True, 87 | fee_payer=True, 88 | has_dropped_log_messages=True, 89 | ), 90 | log=ingest.svm.LogFields( 91 | block_slot=True, 92 | block_hash=True, 93 | transaction_index=True, 94 | log_index=True, 95 | instruction_address=True, 96 | program_id=True, 97 | kind=True, 98 | message=True, 99 | ), 100 | balance=ingest.svm.BalanceFields( 101 | block_slot=True, 102 | block_hash=True, 103 | transaction_index=True, 104 | account=True, 105 | pre=True, 106 | post=True, 107 | ), 108 | token_balance=ingest.svm.TokenBalanceFields( 109 | block_slot=True, 110 | block_hash=True, 111 | transaction_index=True, 112 | account=True, 113 | pre_mint=True, 114 | post_mint=True, 115 | pre_decimals=True, 116 | post_decimals=True, 117 | pre_program_id=True, 118 | post_program_id=True, 119 | pre_owner=True, 120 | post_owner=True, 121 | pre_amount=True, 122 | post_amount=True, 123 | ), 124 | reward=ingest.svm.RewardFields( 125 | block_slot=True, 126 | block_hash=True, 127 | pubkey=True, 128 | lamports=True, 129 | post_balance=True, 130 | reward_type=True, 131 | commission=True, 132 | ), 133 | ), 134 | instructions=[ingest.svm.InstructionRequest()], 135 | transactions=[ingest.svm.TransactionRequest()], 136 | logs=[ingest.svm.LogRequest()], 137 | balances=[ingest.svm.BalanceRequest()], 138 | token_balances=[ingest.svm.TokenBalanceRequest()], 139 | rewards=[ingest.svm.RewardRequest()], 140 | ), 141 | ) 142 | 143 | print("running with sqd") 144 | asyncio.run( 145 | run( 146 | ingest.ProviderConfig( 147 | kind=ingest.ProviderKind.SQD, 148 | url=url, 149 | ), 150 | query=query, 151 | ) 152 | ) 153 | -------------------------------------------------------------------------------- /python/cherry_core/ingest/evm.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | from dataclasses import dataclass, field 3 | 4 | 5 | @dataclass 6 | class TransactionRequest: 7 | from_: list[str] = field(default_factory=list) 8 | to: list[str] = field(default_factory=list) 9 | sighash: list[str] = field(default_factory=list) 10 | status: list[int] = field(default_factory=list) 11 | type_: list[int] = field(default_factory=list) 12 | contract_deployment_address: list[str] = field(default_factory=list) 13 | hash: list[str] = field(default_factory=list) 14 | include_logs: bool = False 15 | include_traces: bool = False 16 | include_blocks: bool = False 17 | 18 | 19 | @dataclass 20 | class LogRequest: 21 | address: list[str] = field(default_factory=list) 22 | topic0: list[str] = field(default_factory=list) 23 | topic1: list[str] = field(default_factory=list) 24 | topic2: list[str] = field(default_factory=list) 25 | topic3: list[str] = field(default_factory=list) 26 | include_transactions: bool = False 27 | include_transaction_logs: bool = False 28 | include_transaction_traces: bool = False 29 | include_blocks: bool = False 30 | 31 | 32 | @dataclass 33 | class TraceRequest: 34 | from_: list[str] = field(default_factory=list) 35 | to: list[str] = field(default_factory=list) 36 | address: list[str] = field(default_factory=list) 37 | call_type: list[str] = field(default_factory=list) 38 | reward_type: list[str] = field(default_factory=list) 39 | type_: list[str] = field(default_factory=list) 40 | sighash: list[str] = field(default_factory=list) 41 | author: list[str] = field(default_factory=list) 42 | include_transactions: bool = False 43 | include_transaction_logs: bool = False 44 | include_transaction_traces: bool = False 45 | include_blocks: bool = False 46 | 47 | 48 | @dataclass 49 | class BlockFields: 50 | number: bool = False 51 | hash: bool = False 52 | parent_hash: bool = False 53 | nonce: bool = False 54 | sha3_uncles: bool = False 55 | logs_bloom: bool = False 56 | transactions_root: bool = False 57 | state_root: bool = False 58 | receipts_root: bool = False 59 | miner: bool = False 60 | difficulty: bool = False 61 | total_difficulty: bool = False 62 | extra_data: bool = False 63 | size: bool = False 64 | gas_limit: bool = False 65 | gas_used: bool = False 66 | timestamp: bool = False 67 | uncles: bool = False 68 | base_fee_per_gas: bool = False 69 | blob_gas_used: bool = False 70 | excess_blob_gas: bool = False 71 | parent_beacon_block_root: bool = False 72 | withdrawals_root: bool = False 73 | withdrawals: bool = False 74 | l1_block_number: bool = False 75 | send_count: bool = False 76 | send_root: bool = False 77 | mix_hash: bool = False 78 | 79 | 80 | @dataclass 81 | class TransactionFields: 82 | block_hash: bool = False 83 | block_number: bool = False 84 | from_: bool = False 85 | gas: bool = False 86 | gas_price: bool = False 87 | hash: bool = False 88 | input: bool = False 89 | nonce: bool = False 90 | to: bool = False 91 | transaction_index: bool = False 92 | value: bool = False 93 | v: bool = False 94 | r: bool = False 95 | s: bool = False 96 | max_priority_fee_per_gas: bool = False 97 | max_fee_per_gas: bool = False 98 | chain_id: bool = False 99 | cumulative_gas_used: bool = False 100 | effective_gas_price: bool = False 101 | gas_used: bool = False 102 | contract_address: bool = False 103 | logs_bloom: bool = False 104 | type_: bool = False 105 | root: bool = False 106 | status: bool = False 107 | sighash: bool = False 108 | y_parity: bool = False 109 | access_list: bool = False 110 | l1_fee: bool = False 111 | l1_gas_price: bool = False 112 | l1_fee_scalar: bool = False 113 | gas_used_for_l1: bool = False 114 | max_fee_per_blob_gas: bool = False 115 | blob_versioned_hashes: bool = False 116 | deposit_nonce: bool = False 117 | blob_gas_price: bool = False 118 | deposit_receipt_version: bool = False 119 | blob_gas_used: bool = False 120 | l1_base_fee_scalar: bool = False 121 | l1_blob_base_fee: bool = False 122 | l1_blob_base_fee_scalar: bool = False 123 | l1_block_number: bool = False 124 | mint: bool = False 125 | source_hash: bool = False 126 | 127 | 128 | @dataclass 129 | class LogFields: 130 | removed: bool = False 131 | log_index: bool = False 132 | transaction_index: bool = False 133 | transaction_hash: bool = False 134 | block_hash: bool = False 135 | block_number: bool = False 136 | address: bool = False 137 | data: bool = False 138 | topic0: bool = False 139 | topic1: bool = False 140 | topic2: bool = False 141 | topic3: bool = False 142 | 143 | 144 | @dataclass 145 | class TraceFields: 146 | from_: bool = False 147 | to: bool = False 148 | call_type: bool = False 149 | gas: bool = False 150 | input: bool = False 151 | init: bool = False 152 | value: bool = False 153 | author: bool = False 154 | reward_type: bool = False 155 | block_hash: bool = False 156 | block_number: bool = False 157 | address: bool = False 158 | code: bool = False 159 | gas_used: bool = False 160 | output: bool = False 161 | subtraces: bool = False 162 | trace_address: bool = False 163 | transaction_hash: bool = False 164 | transaction_position: bool = False 165 | type_: bool = False 166 | error: bool = False 167 | sighash: bool = False 168 | action_address: bool = False 169 | balance: bool = False 170 | refund_address: bool = False 171 | 172 | 173 | @dataclass 174 | class Fields: 175 | block: BlockFields = field(default_factory=BlockFields) 176 | transaction: TransactionFields = field(default_factory=TransactionFields) 177 | log: LogFields = field(default_factory=LogFields) 178 | trace: TraceFields = field(default_factory=TraceFields) 179 | 180 | 181 | @dataclass 182 | class Query: 183 | from_block: int = 0 184 | to_block: Optional[int] = None 185 | include_all_blocks: bool = False 186 | transactions: list[TransactionRequest] = field(default_factory=list) 187 | logs: list[LogRequest] = field(default_factory=list) 188 | traces: list[TraceRequest] = field(default_factory=list) 189 | fields: Fields = field(default_factory=Fields) 190 | -------------------------------------------------------------------------------- /python/cherry_core/ingest/svm.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Union 2 | from dataclasses import dataclass, field 3 | from enum import Enum 4 | 5 | 6 | @dataclass 7 | class InstructionFields: 8 | block_slot: bool = False 9 | block_hash: bool = False 10 | transaction_index: bool = False 11 | instruction_address: bool = False 12 | program_id: bool = False 13 | a0: bool = False 14 | a1: bool = False 15 | a2: bool = False 16 | a3: bool = False 17 | a4: bool = False 18 | a5: bool = False 19 | a6: bool = False 20 | a7: bool = False 21 | a8: bool = False 22 | a9: bool = False 23 | rest_of_accounts: bool = False 24 | data: bool = False 25 | d1: bool = False 26 | d2: bool = False 27 | d4: bool = False 28 | d8: bool = False 29 | error: bool = False 30 | compute_units_consumed: bool = False 31 | is_committed: bool = False 32 | has_dropped_log_messages: bool = False 33 | 34 | 35 | @dataclass 36 | class TransactionFields: 37 | block_slot: bool = False 38 | block_hash: bool = False 39 | transaction_index: bool = False 40 | signature: bool = False 41 | version: bool = False 42 | account_keys: bool = False 43 | address_table_lookups: bool = False 44 | num_readonly_signed_accounts: bool = False 45 | num_readonly_unsigned_accounts: bool = False 46 | num_required_signatures: bool = False 47 | recent_blockhash: bool = False 48 | signatures: bool = False 49 | err: bool = False 50 | fee: bool = False 51 | compute_units_consumed: bool = False 52 | loaded_readonly_addresses: bool = False 53 | loaded_writable_addresses: bool = False 54 | fee_payer: bool = False 55 | has_dropped_log_messages: bool = False 56 | 57 | 58 | @dataclass 59 | class LogFields: 60 | block_slot: bool = False 61 | block_hash: bool = False 62 | transaction_index: bool = False 63 | log_index: bool = False 64 | instruction_address: bool = False 65 | program_id: bool = False 66 | kind: bool = False 67 | message: bool = False 68 | 69 | 70 | @dataclass 71 | class BalanceFields: 72 | block_slot: bool = False 73 | block_hash: bool = False 74 | transaction_index: bool = False 75 | account: bool = False 76 | pre: bool = False 77 | post: bool = False 78 | 79 | 80 | @dataclass 81 | class TokenBalanceFields: 82 | block_slot: bool = False 83 | block_hash: bool = False 84 | transaction_index: bool = False 85 | account: bool = False 86 | pre_mint: bool = False 87 | post_mint: bool = False 88 | pre_decimals: bool = False 89 | post_decimals: bool = False 90 | pre_program_id: bool = False 91 | post_program_id: bool = False 92 | pre_owner: bool = False 93 | post_owner: bool = False 94 | pre_amount: bool = False 95 | post_amount: bool = False 96 | 97 | 98 | @dataclass 99 | class RewardFields: 100 | block_slot: bool = False 101 | block_hash: bool = False 102 | pubkey: bool = False 103 | lamports: bool = False 104 | post_balance: bool = False 105 | reward_type: bool = False 106 | commission: bool = False 107 | 108 | 109 | @dataclass 110 | class BlockFields: 111 | slot: bool = False 112 | hash: bool = False 113 | parent_slot: bool = False 114 | parent_hash: bool = False 115 | height: bool = False 116 | timestamp: bool = False 117 | 118 | 119 | @dataclass 120 | class Fields: 121 | instruction: InstructionFields = field(default_factory=InstructionFields) 122 | transaction: TransactionFields = field(default_factory=TransactionFields) 123 | log: LogFields = field(default_factory=LogFields) 124 | balance: BalanceFields = field(default_factory=BalanceFields) 125 | token_balance: TokenBalanceFields = field(default_factory=TokenBalanceFields) 126 | reward: RewardFields = field(default_factory=RewardFields) 127 | block: BlockFields = field(default_factory=BlockFields) 128 | 129 | 130 | @dataclass 131 | class InstructionRequest: 132 | program_id: list[str] = field(default_factory=list) 133 | discriminator: list[Union[bytes, str]] = field(default_factory=list) 134 | d1: list[Union[bytes, str]] = field(default_factory=list) 135 | d2: list[Union[bytes, str]] = field(default_factory=list) 136 | d3: list[Union[bytes, str]] = field(default_factory=list) 137 | d4: list[Union[bytes, str]] = field(default_factory=list) 138 | d8: list[Union[bytes, str]] = field(default_factory=list) 139 | a0: list[str] = field(default_factory=list) 140 | a1: list[str] = field(default_factory=list) 141 | a2: list[str] = field(default_factory=list) 142 | a3: list[str] = field(default_factory=list) 143 | a4: list[str] = field(default_factory=list) 144 | a5: list[str] = field(default_factory=list) 145 | a6: list[str] = field(default_factory=list) 146 | a7: list[str] = field(default_factory=list) 147 | a8: list[str] = field(default_factory=list) 148 | a9: list[str] = field(default_factory=list) 149 | is_committed: bool = False 150 | include_transactions: bool = False 151 | include_transaction_token_balances: bool = False 152 | include_logs: bool = False 153 | include_inner_instructions: bool = False 154 | include_blocks: bool = True 155 | 156 | 157 | @dataclass 158 | class TransactionRequest: 159 | fee_payer: list[str] = field(default_factory=list) 160 | include_instructions: bool = False 161 | include_logs: bool = False 162 | include_blocks: bool = False 163 | 164 | 165 | class LogKind(str, Enum): 166 | LOG = "log" 167 | DATA = "data" 168 | OTHER = "other" 169 | 170 | 171 | @dataclass 172 | class LogRequest: 173 | program_id: list[str] = field(default_factory=list) 174 | kind: list[LogKind] = field( 175 | default_factory=list 176 | ) # Assuming LogKind is represented as a string 177 | include_transactions: bool = False 178 | include_instructions: bool = False 179 | include_blocks: bool = False 180 | 181 | 182 | @dataclass 183 | class BalanceRequest: 184 | account: list[str] = field(default_factory=list) 185 | include_transactions: bool = False 186 | include_transaction_instructions: bool = False 187 | include_blocks: bool = False 188 | 189 | 190 | @dataclass 191 | class TokenBalanceRequest: 192 | account: list[str] = field(default_factory=list) 193 | pre_program_id: list[str] = field(default_factory=list) 194 | post_program_id: list[str] = field(default_factory=list) 195 | pre_mint: list[str] = field(default_factory=list) 196 | post_mint: list[str] = field(default_factory=list) 197 | pre_owner: list[str] = field(default_factory=list) 198 | post_owner: list[str] = field(default_factory=list) 199 | include_transactions: bool = False 200 | include_transaction_instructions: bool = False 201 | include_blocks: bool = False 202 | 203 | 204 | @dataclass 205 | class RewardRequest: 206 | pubkey: list[str] = field(default_factory=list) 207 | include_blocks: bool = False 208 | 209 | 210 | @dataclass 211 | class Query: 212 | from_block: int = 0 213 | to_block: Optional[int] = None 214 | include_all_blocks: bool = False 215 | fields: Fields = field(default_factory=Fields) 216 | instructions: list[InstructionRequest] = field(default_factory=list) 217 | transactions: list[TransactionRequest] = field(default_factory=list) 218 | logs: list[LogRequest] = field(default_factory=list) 219 | balances: list[BalanceRequest] = field(default_factory=list) 220 | token_balances: list[TokenBalanceRequest] = field(default_factory=list) 221 | rewards: list[RewardRequest] = field(default_factory=list) 222 | -------------------------------------------------------------------------------- /ingest/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::should_implement_trait)] 2 | #![allow(clippy::field_reassign_with_default)] 3 | 4 | use std::{collections::BTreeMap, pin::Pin, sync::Arc}; 5 | 6 | use anyhow::{anyhow, Context, Result}; 7 | use arrow::record_batch::RecordBatch; 8 | use futures_lite::{Stream, StreamExt}; 9 | use provider::common::{evm_query_to_generic, svm_query_to_generic}; 10 | use serde::de::DeserializeOwned; 11 | 12 | pub mod evm; 13 | mod provider; 14 | mod rayon_async; 15 | pub mod svm; 16 | 17 | #[derive(Debug, Clone)] 18 | pub enum Query { 19 | Evm(evm::Query), 20 | Svm(svm::Query), 21 | } 22 | 23 | #[cfg(feature = "pyo3")] 24 | impl<'py> pyo3::FromPyObject<'py> for Query { 25 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 26 | use pyo3::types::PyAnyMethods; 27 | 28 | let kind = ob.getattr("kind").context("get kind attribute")?; 29 | let kind: &str = kind.extract().context("kind as str")?; 30 | 31 | let query = ob.getattr("params").context("get params attribute")?; 32 | 33 | match kind { 34 | "evm" => Ok(Self::Evm(query.extract().context("parse query")?)), 35 | "svm" => Ok(Self::Svm(query.extract().context("parse query")?)), 36 | _ => Err(anyhow!("unknown query kind: {}", kind).into()), 37 | } 38 | } 39 | } 40 | 41 | #[derive(Debug, Clone)] 42 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 43 | pub struct ProviderConfig { 44 | pub kind: ProviderKind, 45 | pub url: Option, 46 | pub bearer_token: Option, 47 | pub max_num_retries: Option, 48 | pub retry_backoff_ms: Option, 49 | pub retry_base_ms: Option, 50 | pub retry_ceiling_ms: Option, 51 | pub req_timeout_millis: Option, 52 | pub stop_on_head: bool, 53 | pub head_poll_interval_millis: Option, 54 | pub buffer_size: Option, 55 | } 56 | 57 | impl ProviderConfig { 58 | pub fn new(kind: ProviderKind) -> Self { 59 | Self { 60 | kind, 61 | url: None, 62 | bearer_token: None, 63 | max_num_retries: None, 64 | retry_backoff_ms: None, 65 | retry_base_ms: None, 66 | retry_ceiling_ms: None, 67 | req_timeout_millis: None, 68 | stop_on_head: false, 69 | head_poll_interval_millis: None, 70 | buffer_size: None, 71 | } 72 | } 73 | } 74 | 75 | #[derive(Debug, Clone, Copy)] 76 | pub enum ProviderKind { 77 | Sqd, 78 | Hypersync, 79 | } 80 | 81 | #[cfg(feature = "pyo3")] 82 | impl<'py> pyo3::FromPyObject<'py> for ProviderKind { 83 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 84 | use pyo3::types::PyAnyMethods; 85 | 86 | let out: &str = ob.extract().context("read as string")?; 87 | 88 | match out { 89 | "sqd" => Ok(Self::Sqd), 90 | "hypersync" => Ok(Self::Hypersync), 91 | _ => Err(anyhow!("unknown provider kind: {}", out).into()), 92 | } 93 | } 94 | } 95 | 96 | type DataStream = Pin>> + Send + Sync>>; 97 | 98 | fn make_req_fields(query: &cherry_query::Query) -> Result { 99 | let mut req_fields_query = query.clone(); 100 | req_fields_query 101 | .add_request_and_include_fields() 102 | .context("add req and include fields")?; 103 | 104 | let fields = req_fields_query 105 | .fields 106 | .into_iter() 107 | .map(|(k, v)| { 108 | ( 109 | k.strip_suffix('s').unwrap().to_owned(), 110 | v.into_iter() 111 | .map(|v| (v, true)) 112 | .collect::>(), 113 | ) 114 | }) 115 | .collect::>(); 116 | 117 | Ok(serde_json::from_value(serde_json::to_value(&fields).unwrap()).unwrap()) 118 | } 119 | 120 | pub async fn start_stream(provider_config: ProviderConfig, mut query: Query) -> Result { 121 | let generic_query = match &mut query { 122 | Query::Evm(evm_query) => { 123 | let generic_query = evm_query_to_generic(evm_query); 124 | 125 | evm_query.fields = make_req_fields(&generic_query).context("make req fields")?; 126 | 127 | generic_query 128 | } 129 | Query::Svm(svm_query) => { 130 | let generic_query = svm_query_to_generic(svm_query); 131 | 132 | svm_query.fields = make_req_fields(&generic_query).context("make req fields")?; 133 | 134 | generic_query 135 | } 136 | }; 137 | let generic_query = Arc::new(generic_query); 138 | 139 | let stream = match provider_config.kind { 140 | ProviderKind::Sqd => { 141 | provider::sqd::start_stream(provider_config, query).context("start sqd stream")? 142 | } 143 | ProviderKind::Hypersync => provider::hypersync::start_stream(provider_config, query) 144 | .await 145 | .context("start hypersync stream")?, 146 | }; 147 | 148 | let stream = stream.then(move |res| { 149 | let generic_query = Arc::clone(&generic_query); 150 | async { 151 | rayon_async::spawn(move || { 152 | res.and_then(move |data| { 153 | let data = cherry_query::run_query(&data, &generic_query) 154 | .context("run local query")?; 155 | Ok(data) 156 | }) 157 | }) 158 | .await 159 | .unwrap() 160 | } 161 | }); 162 | 163 | Ok(Box::pin(stream)) 164 | } 165 | 166 | #[cfg(test)] 167 | mod tests { 168 | 169 | use super::*; 170 | use crate::svm::*; 171 | use parquet::arrow::ArrowWriter; 172 | use std::fs::File; 173 | 174 | #[tokio::test] 175 | #[ignore] 176 | async fn simple_svm_start_stream() { 177 | let mut provider_config = ProviderConfig::new(ProviderKind::Sqd); 178 | provider_config.url = Some("https://portal.sqd.dev/datasets/solana-mainnet".to_string()); 179 | 180 | let program_id = "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"; 181 | let program_id: [u8; 32] = bs58::decode(program_id) 182 | .into_vec() 183 | .unwrap() 184 | .try_into() 185 | .unwrap(); 186 | let program_id = Address(program_id); 187 | 188 | let query = crate::Query::Svm(svm::Query { 189 | from_block: 329443000, 190 | to_block: Some(329443000), 191 | include_all_blocks: false, 192 | fields: Fields { 193 | instruction: InstructionFields::all(), 194 | transaction: TransactionFields::default(), 195 | log: LogFields::default(), 196 | balance: BalanceFields::default(), 197 | token_balance: TokenBalanceFields::default(), 198 | reward: RewardFields::default(), 199 | block: BlockFields::default(), 200 | }, 201 | instructions: vec![ 202 | // InstructionRequest::default() , 203 | InstructionRequest { 204 | program_id: vec![program_id], 205 | discriminator: vec![Data(vec![12, 96, 49, 128, 22])], 206 | ..Default::default() 207 | }, 208 | ], 209 | transactions: vec![], 210 | logs: vec![], 211 | balances: vec![], 212 | token_balances: vec![], 213 | rewards: vec![], 214 | }); 215 | let mut stream = start_stream(provider_config, query).await.unwrap(); 216 | let data = stream.next().await.unwrap().unwrap(); 217 | for (k, v) in data.into_iter() { 218 | let mut file = File::create(format!("{}.parquet", k)).unwrap(); 219 | let mut writer = ArrowWriter::try_new(&mut file, v.schema(), None).unwrap(); 220 | writer.write(&v).unwrap(); 221 | writer.close().unwrap(); 222 | } 223 | } 224 | } 225 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /python/LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | -------------------------------------------------------------------------------- /ingest/src/evm.rs: -------------------------------------------------------------------------------- 1 | use anyhow::Context; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Default, Debug, Clone)] 5 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 6 | pub struct Query { 7 | pub from_block: u64, 8 | pub to_block: Option, 9 | pub include_all_blocks: bool, 10 | pub transactions: Vec, 11 | pub logs: Vec, 12 | pub traces: Vec, 13 | pub fields: Fields, 14 | } 15 | 16 | #[derive(Debug, Clone, Copy)] 17 | pub struct Hash(pub [u8; 32]); 18 | 19 | #[derive(Debug, Clone, Copy)] 20 | pub struct Address(pub [u8; 20]); 21 | 22 | #[derive(Debug, Clone, Copy)] 23 | pub struct Sighash(pub [u8; 4]); 24 | 25 | #[derive(Debug, Clone, Copy)] 26 | pub struct Topic(pub [u8; 32]); 27 | 28 | #[cfg(feature = "pyo3")] 29 | fn extract_hex(ob: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult<[u8; N]> { 30 | use pyo3::types::PyAnyMethods; 31 | 32 | let s: &str = ob.extract()?; 33 | let s = s.strip_prefix("0x").context("strip 0x prefix")?; 34 | let mut out = [0; N]; 35 | faster_hex::hex_decode(s.as_bytes(), &mut out).context("decode hex")?; 36 | 37 | Ok(out) 38 | } 39 | 40 | #[cfg(feature = "pyo3")] 41 | impl<'py> pyo3::FromPyObject<'py> for Hash { 42 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 43 | let out = extract_hex(ob)?; 44 | Ok(Self(out)) 45 | } 46 | } 47 | 48 | #[cfg(feature = "pyo3")] 49 | impl<'py> pyo3::FromPyObject<'py> for Address { 50 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 51 | let out = extract_hex(ob)?; 52 | Ok(Self(out)) 53 | } 54 | } 55 | 56 | #[cfg(feature = "pyo3")] 57 | impl<'py> pyo3::FromPyObject<'py> for Sighash { 58 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 59 | let out = extract_hex(ob)?; 60 | Ok(Self(out)) 61 | } 62 | } 63 | 64 | #[cfg(feature = "pyo3")] 65 | impl<'py> pyo3::FromPyObject<'py> for Topic { 66 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 67 | let out = extract_hex(ob)?; 68 | Ok(Self(out)) 69 | } 70 | } 71 | 72 | // #[derive(Default, Debug, Clone)] 73 | // pub struct BlockRequest { 74 | // pub hash: Vec, 75 | // pub miner: Vec
, 76 | // } 77 | 78 | #[derive(Default, Debug, Clone)] 79 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 80 | pub struct TransactionRequest { 81 | pub from_: Vec
, 82 | pub to: Vec
, 83 | pub sighash: Vec, 84 | pub status: Vec, 85 | pub type_: Vec, 86 | pub contract_deployment_address: Vec
, 87 | pub hash: Vec, 88 | pub include_logs: bool, 89 | pub include_traces: bool, 90 | pub include_blocks: bool, 91 | } 92 | 93 | #[derive(Default, Debug, Clone)] 94 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 95 | pub struct LogRequest { 96 | pub address: Vec
, 97 | pub topic0: Vec, 98 | pub topic1: Vec, 99 | pub topic2: Vec, 100 | pub topic3: Vec, 101 | pub include_transactions: bool, 102 | pub include_transaction_logs: bool, 103 | pub include_transaction_traces: bool, 104 | pub include_blocks: bool, 105 | } 106 | 107 | #[derive(Default, Debug, Clone)] 108 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 109 | pub struct TraceRequest { 110 | pub from_: Vec
, 111 | pub to: Vec
, 112 | pub address: Vec
, 113 | pub call_type: Vec, 114 | pub reward_type: Vec, 115 | pub type_: Vec, 116 | pub sighash: Vec, 117 | pub author: Vec
, 118 | pub include_transactions: bool, 119 | pub include_transaction_logs: bool, 120 | pub include_transaction_traces: bool, 121 | pub include_blocks: bool, 122 | } 123 | 124 | #[derive(Deserialize, Serialize, Default, Debug, Clone, Copy)] 125 | #[serde(default)] 126 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 127 | pub struct Fields { 128 | pub block: BlockFields, 129 | pub transaction: TransactionFields, 130 | pub log: LogFields, 131 | pub trace: TraceFields, 132 | } 133 | 134 | impl Fields { 135 | pub fn all() -> Self { 136 | Self { 137 | block: BlockFields::all(), 138 | transaction: TransactionFields::all(), 139 | log: LogFields::all(), 140 | trace: TraceFields::all(), 141 | } 142 | } 143 | } 144 | 145 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 146 | #[serde(default)] 147 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 148 | pub struct BlockFields { 149 | pub number: bool, 150 | pub hash: bool, 151 | pub parent_hash: bool, 152 | pub nonce: bool, 153 | pub sha3_uncles: bool, 154 | pub logs_bloom: bool, 155 | pub transactions_root: bool, 156 | pub state_root: bool, 157 | pub receipts_root: bool, 158 | pub miner: bool, 159 | pub difficulty: bool, 160 | pub total_difficulty: bool, 161 | pub extra_data: bool, 162 | pub size: bool, 163 | pub gas_limit: bool, 164 | pub gas_used: bool, 165 | pub timestamp: bool, 166 | pub uncles: bool, 167 | pub base_fee_per_gas: bool, 168 | pub blob_gas_used: bool, 169 | pub excess_blob_gas: bool, 170 | pub parent_beacon_block_root: bool, 171 | pub withdrawals_root: bool, 172 | pub withdrawals: bool, 173 | pub l1_block_number: bool, 174 | pub send_count: bool, 175 | pub send_root: bool, 176 | pub mix_hash: bool, 177 | } 178 | 179 | impl BlockFields { 180 | pub fn all() -> Self { 181 | BlockFields { 182 | number: true, 183 | hash: true, 184 | parent_hash: true, 185 | nonce: true, 186 | sha3_uncles: true, 187 | logs_bloom: true, 188 | transactions_root: true, 189 | state_root: true, 190 | receipts_root: true, 191 | miner: true, 192 | difficulty: true, 193 | total_difficulty: true, 194 | extra_data: true, 195 | size: true, 196 | gas_limit: true, 197 | gas_used: true, 198 | timestamp: true, 199 | uncles: true, 200 | base_fee_per_gas: true, 201 | blob_gas_used: true, 202 | excess_blob_gas: true, 203 | parent_beacon_block_root: true, 204 | withdrawals_root: true, 205 | withdrawals: true, 206 | l1_block_number: true, 207 | send_count: true, 208 | send_root: true, 209 | mix_hash: true, 210 | } 211 | } 212 | } 213 | 214 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 215 | #[serde(default)] 216 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 217 | pub struct TransactionFields { 218 | pub block_hash: bool, 219 | pub block_number: bool, 220 | #[serde(rename = "from")] 221 | pub from_: bool, 222 | pub gas: bool, 223 | pub gas_price: bool, 224 | pub hash: bool, 225 | pub input: bool, 226 | pub nonce: bool, 227 | pub to: bool, 228 | pub transaction_index: bool, 229 | pub value: bool, 230 | pub v: bool, 231 | pub r: bool, 232 | pub s: bool, 233 | pub max_priority_fee_per_gas: bool, 234 | pub max_fee_per_gas: bool, 235 | pub chain_id: bool, 236 | pub cumulative_gas_used: bool, 237 | pub effective_gas_price: bool, 238 | pub gas_used: bool, 239 | pub contract_address: bool, 240 | pub logs_bloom: bool, 241 | #[serde(rename = "type")] 242 | pub type_: bool, 243 | pub root: bool, 244 | pub status: bool, 245 | pub sighash: bool, 246 | pub y_parity: bool, 247 | pub access_list: bool, 248 | pub l1_fee: bool, 249 | pub l1_gas_price: bool, 250 | pub l1_fee_scalar: bool, 251 | pub gas_used_for_l1: bool, 252 | pub max_fee_per_blob_gas: bool, 253 | pub blob_versioned_hashes: bool, 254 | pub deposit_nonce: bool, 255 | pub blob_gas_price: bool, 256 | pub deposit_receipt_version: bool, 257 | pub blob_gas_used: bool, 258 | pub l1_base_fee_scalar: bool, 259 | pub l1_blob_base_fee: bool, 260 | pub l1_blob_base_fee_scalar: bool, 261 | pub l1_block_number: bool, 262 | pub mint: bool, 263 | pub source_hash: bool, 264 | } 265 | 266 | impl TransactionFields { 267 | pub fn all() -> Self { 268 | TransactionFields { 269 | block_hash: true, 270 | block_number: true, 271 | from_: true, 272 | gas: true, 273 | gas_price: true, 274 | hash: true, 275 | input: true, 276 | nonce: true, 277 | to: true, 278 | transaction_index: true, 279 | value: true, 280 | v: true, 281 | r: true, 282 | s: true, 283 | max_priority_fee_per_gas: true, 284 | max_fee_per_gas: true, 285 | chain_id: true, 286 | cumulative_gas_used: true, 287 | effective_gas_price: true, 288 | gas_used: true, 289 | contract_address: true, 290 | logs_bloom: true, 291 | type_: true, 292 | root: true, 293 | status: true, 294 | sighash: true, 295 | y_parity: true, 296 | access_list: true, 297 | l1_fee: true, 298 | l1_gas_price: true, 299 | l1_fee_scalar: true, 300 | gas_used_for_l1: true, 301 | max_fee_per_blob_gas: true, 302 | blob_versioned_hashes: true, 303 | deposit_nonce: true, 304 | blob_gas_price: true, 305 | deposit_receipt_version: true, 306 | blob_gas_used: true, 307 | l1_base_fee_scalar: true, 308 | l1_blob_base_fee: true, 309 | l1_blob_base_fee_scalar: true, 310 | l1_block_number: true, 311 | mint: true, 312 | source_hash: true, 313 | } 314 | } 315 | } 316 | 317 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 318 | #[serde(default)] 319 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 320 | pub struct LogFields { 321 | pub removed: bool, 322 | pub log_index: bool, 323 | pub transaction_index: bool, 324 | pub transaction_hash: bool, 325 | pub block_hash: bool, 326 | pub block_number: bool, 327 | pub address: bool, 328 | pub data: bool, 329 | pub topic0: bool, 330 | pub topic1: bool, 331 | pub topic2: bool, 332 | pub topic3: bool, 333 | } 334 | 335 | impl LogFields { 336 | pub fn all() -> Self { 337 | LogFields { 338 | removed: true, 339 | log_index: true, 340 | transaction_index: true, 341 | transaction_hash: true, 342 | block_hash: true, 343 | block_number: true, 344 | address: true, 345 | data: true, 346 | topic0: true, 347 | topic1: true, 348 | topic2: true, 349 | topic3: true, 350 | } 351 | } 352 | } 353 | 354 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 355 | #[serde(default)] 356 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 357 | pub struct TraceFields { 358 | #[serde(rename = "from")] 359 | pub from_: bool, 360 | pub to: bool, 361 | pub call_type: bool, 362 | pub gas: bool, 363 | pub input: bool, 364 | pub init: bool, 365 | pub value: bool, 366 | pub author: bool, 367 | pub reward_type: bool, 368 | pub block_hash: bool, 369 | pub block_number: bool, 370 | pub address: bool, 371 | pub code: bool, 372 | pub gas_used: bool, 373 | pub output: bool, 374 | pub subtraces: bool, 375 | pub trace_address: bool, 376 | pub transaction_hash: bool, 377 | pub transaction_position: bool, 378 | #[serde(rename = "type")] 379 | pub type_: bool, 380 | pub error: bool, 381 | pub sighash: bool, 382 | pub action_address: bool, 383 | pub balance: bool, 384 | pub refund_address: bool, 385 | } 386 | 387 | impl TraceFields { 388 | pub fn all() -> Self { 389 | TraceFields { 390 | from_: true, 391 | to: true, 392 | call_type: true, 393 | gas: true, 394 | input: true, 395 | init: true, 396 | value: true, 397 | author: true, 398 | reward_type: true, 399 | block_hash: true, 400 | block_number: true, 401 | address: true, 402 | code: true, 403 | gas_used: true, 404 | output: true, 405 | subtraces: true, 406 | trace_address: true, 407 | transaction_hash: true, 408 | transaction_position: true, 409 | type_: true, 410 | error: true, 411 | sighash: true, 412 | action_address: true, 413 | balance: true, 414 | refund_address: true, 415 | } 416 | } 417 | } 418 | -------------------------------------------------------------------------------- /cast/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::manual_div_ceil)] 2 | 3 | use std::sync::Arc; 4 | 5 | use anyhow::{Context, Result}; 6 | use arrow::{ 7 | array::{ 8 | builder, Array, BinaryArray, Decimal256Array, GenericBinaryArray, GenericStringArray, 9 | LargeBinaryArray, OffsetSizeTrait, RecordBatch, 10 | }, 11 | compute::CastOptions, 12 | datatypes::{DataType, Field, Schema}, 13 | }; 14 | 15 | /// Casts columns according to given (column name, target data type) pairs. 16 | /// 17 | /// Returns error if casting a row fails and `allow_cast_fail` is set to `false`. 18 | /// Writes `null` to output if casting a row fails and `allow_cast_fail` is set to `true`. 19 | pub fn cast>( 20 | map: &[(S, DataType)], 21 | data: &RecordBatch, 22 | allow_cast_fail: bool, 23 | ) -> Result { 24 | let schema = cast_schema(map, data.schema_ref()).context("cast schema")?; 25 | 26 | let mut arrays = Vec::with_capacity(data.num_columns()); 27 | 28 | let cast_opt = CastOptions { 29 | safe: allow_cast_fail, 30 | ..Default::default() 31 | }; 32 | 33 | for (col, field) in data.columns().iter().zip(data.schema_ref().fields().iter()) { 34 | let cast_target = map.iter().find(|x| x.0.as_ref() == field.name()); 35 | 36 | let col = match cast_target { 37 | Some(tgt) => { 38 | // allow precision loss for decimal types into floating point types 39 | if matches!( 40 | col.data_type(), 41 | DataType::Decimal256(..) | DataType::Decimal128(..) 42 | ) && tgt.1.is_floating() 43 | { 44 | let string_col = 45 | arrow::compute::cast_with_options(col, &DataType::Utf8, &cast_opt) 46 | .with_context(|| { 47 | format!( 48 | "Failed when casting column '{}' to string as intermediate step", 49 | field.name() 50 | ) 51 | })?; 52 | Arc::new( 53 | arrow::compute::cast_with_options(&string_col, &tgt.1, &cast_opt) 54 | .with_context(|| { 55 | format!( 56 | "Failed when casting column '{}' to {:?}", 57 | field.name(), 58 | tgt.1 59 | ) 60 | })?, 61 | ) 62 | } else { 63 | Arc::new( 64 | arrow::compute::cast_with_options(col, &tgt.1, &cast_opt).with_context( 65 | || { 66 | format!( 67 | "Failed when casting column '{}' from {:?} to {:?}", 68 | field.name(), 69 | col.data_type(), 70 | tgt.1 71 | ) 72 | }, 73 | )?, 74 | ) 75 | } 76 | } 77 | None => col.clone(), 78 | }; 79 | 80 | arrays.push(col); 81 | } 82 | 83 | let batch = RecordBatch::try_new(Arc::new(schema), arrays).context("construct record batch")?; 84 | 85 | Ok(batch) 86 | } 87 | 88 | /// Casts column types according to given (column name, target data type) pairs. 89 | pub fn cast_schema>(map: &[(S, DataType)], schema: &Schema) -> Result { 90 | let mut fields = schema.fields().to_vec(); 91 | 92 | for f in fields.iter_mut() { 93 | let cast_target = map.iter().find(|x| x.0.as_ref() == f.name()); 94 | 95 | if let Some(tgt) = cast_target { 96 | *f = Arc::new(Field::new(f.name(), tgt.1.clone(), f.is_nullable())); 97 | } 98 | } 99 | 100 | Ok(Schema::new(fields)) 101 | } 102 | 103 | /// Casts all columns with from_type to to_type. 104 | /// 105 | /// Returns error if casting a row fails and `allow_cast_fail` is set to `false`. 106 | /// Writes `null` to output if casting a row fails and `allow_cast_fail` is set to `true`. 107 | pub fn cast_by_type( 108 | data: &RecordBatch, 109 | from_type: &DataType, 110 | to_type: &DataType, 111 | allow_cast_fail: bool, 112 | ) -> Result { 113 | let schema = 114 | cast_schema_by_type(data.schema_ref(), from_type, to_type).context("cast schema")?; 115 | 116 | let mut arrays = Vec::with_capacity(data.num_columns()); 117 | 118 | let cast_opt = CastOptions { 119 | safe: allow_cast_fail, 120 | ..Default::default() 121 | }; 122 | 123 | for (col, field) in data.columns().iter().zip(data.schema_ref().fields().iter()) { 124 | let col = if col.data_type() == from_type { 125 | // allow precision loss for decimal types into floating point types 126 | if matches!( 127 | col.data_type(), 128 | DataType::Decimal256(..) | DataType::Decimal128(..) 129 | ) && to_type.is_floating() 130 | { 131 | let string_col = arrow::compute::cast_with_options(col, &DataType::Utf8, &cast_opt) 132 | .with_context(|| { 133 | format!( 134 | "Failed when casting_by_type column '{}' to string as intermediate step", 135 | field.name() 136 | ) 137 | })?; 138 | Arc::new( 139 | arrow::compute::cast_with_options(&string_col, to_type, &cast_opt) 140 | .with_context(|| { 141 | format!( 142 | "Failed when casting_by_type column '{}' to {:?}", 143 | field.name(), 144 | to_type 145 | ) 146 | })?, 147 | ) 148 | } else { 149 | Arc::new( 150 | arrow::compute::cast_with_options(col, to_type, &cast_opt).with_context( 151 | || { 152 | format!( 153 | "Failed when casting_by_type column '{}' to {:?}", 154 | field.name(), 155 | to_type 156 | ) 157 | }, 158 | )?, 159 | ) 160 | } 161 | } else { 162 | col.clone() 163 | }; 164 | 165 | arrays.push(col); 166 | } 167 | 168 | let batch = RecordBatch::try_new(Arc::new(schema), arrays).context("construct record batch")?; 169 | 170 | Ok(batch) 171 | } 172 | 173 | /// Casts columns with from_type to to_type 174 | pub fn cast_schema_by_type( 175 | schema: &Schema, 176 | from_type: &DataType, 177 | to_type: &DataType, 178 | ) -> Result { 179 | let mut fields = schema.fields().to_vec(); 180 | 181 | for f in fields.iter_mut() { 182 | if f.data_type() == from_type { 183 | *f = Arc::new(Field::new(f.name(), to_type.clone(), f.is_nullable())); 184 | } 185 | } 186 | 187 | Ok(Schema::new(fields)) 188 | } 189 | 190 | pub fn base58_encode(data: &RecordBatch) -> Result { 191 | let schema = schema_binary_to_string(data.schema_ref()); 192 | let mut columns = Vec::>::with_capacity(data.columns().len()); 193 | 194 | for col in data.columns().iter() { 195 | if col.data_type() == &DataType::Binary { 196 | columns.push(Arc::new(base58_encode_column( 197 | col.as_any().downcast_ref::().unwrap(), 198 | ))); 199 | } else if col.data_type() == &DataType::LargeBinary { 200 | columns.push(Arc::new(base58_encode_column( 201 | col.as_any().downcast_ref::().unwrap(), 202 | ))); 203 | } else { 204 | columns.push(col.clone()); 205 | } 206 | } 207 | 208 | RecordBatch::try_new(Arc::new(schema), columns).context("construct arrow batch") 209 | } 210 | 211 | pub fn base58_encode_column( 212 | col: &GenericBinaryArray, 213 | ) -> GenericStringArray { 214 | let mut arr = builder::GenericStringBuilder::::with_capacity( 215 | col.len(), 216 | (col.value_data().len() + 2) * 2, 217 | ); 218 | 219 | for v in col.iter() { 220 | match v { 221 | Some(v) => { 222 | let v = bs58::encode(v) 223 | .with_alphabet(bs58::Alphabet::BITCOIN) 224 | .into_string(); 225 | arr.append_value(v); 226 | } 227 | None => arr.append_null(), 228 | } 229 | } 230 | 231 | arr.finish() 232 | } 233 | 234 | pub fn hex_encode(data: &RecordBatch) -> Result { 235 | let schema = schema_binary_to_string(data.schema_ref()); 236 | let mut columns = Vec::>::with_capacity(data.columns().len()); 237 | 238 | for col in data.columns().iter() { 239 | if col.data_type() == &DataType::Binary { 240 | columns.push(Arc::new(hex_encode_column::( 241 | col.as_any().downcast_ref::().unwrap(), 242 | ))); 243 | } else if col.data_type() == &DataType::LargeBinary { 244 | columns.push(Arc::new(hex_encode_column::( 245 | col.as_any().downcast_ref::().unwrap(), 246 | ))); 247 | } else { 248 | columns.push(col.clone()); 249 | } 250 | } 251 | 252 | RecordBatch::try_new(Arc::new(schema), columns).context("construct arrow batch") 253 | } 254 | 255 | pub fn hex_encode_column( 256 | col: &GenericBinaryArray, 257 | ) -> GenericStringArray { 258 | let mut arr = builder::GenericStringBuilder::::with_capacity( 259 | col.len(), 260 | (col.value_data().len() + 2) * 2, 261 | ); 262 | 263 | for v in col.iter() { 264 | match v { 265 | Some(v) => { 266 | // TODO: avoid allocation here and use a scratch buffer to encode hex into or write to arrow buffer 267 | // directly somehow. 268 | let v = if PREFIXED { 269 | format!("0x{}", faster_hex::hex_string(v)) 270 | } else { 271 | faster_hex::hex_string(v) 272 | }; 273 | 274 | arr.append_value(v); 275 | } 276 | None => arr.append_null(), 277 | } 278 | } 279 | 280 | arr.finish() 281 | } 282 | 283 | /// Converts binary fields to string in the schema 284 | /// 285 | /// Intended to be used with encode hex functions 286 | pub fn schema_binary_to_string(schema: &Schema) -> Schema { 287 | let mut fields = Vec::>::with_capacity(schema.fields().len()); 288 | 289 | for f in schema.fields().iter() { 290 | if f.data_type() == &DataType::Binary { 291 | fields.push(Arc::new(Field::new( 292 | f.name().clone(), 293 | DataType::Utf8, 294 | f.is_nullable(), 295 | ))); 296 | } else if f.data_type() == &DataType::LargeBinary { 297 | fields.push(Arc::new(Field::new( 298 | f.name().clone(), 299 | DataType::LargeUtf8, 300 | f.is_nullable(), 301 | ))); 302 | } else { 303 | fields.push(f.clone()); 304 | } 305 | } 306 | 307 | Schema::new(fields) 308 | } 309 | 310 | /// Converts decimal256 fields to binary in the schema 311 | /// 312 | /// Intended to be used with u256_to_binary function 313 | pub fn schema_decimal256_to_binary(schema: &Schema) -> Schema { 314 | let mut fields = Vec::>::with_capacity(schema.fields().len()); 315 | 316 | for f in schema.fields().iter() { 317 | if f.data_type() == &DataType::Decimal256(76, 0) { 318 | fields.push(Arc::new(Field::new( 319 | f.name().clone(), 320 | DataType::Binary, 321 | f.is_nullable(), 322 | ))); 323 | } else { 324 | fields.push(f.clone()); 325 | } 326 | } 327 | 328 | Schema::new(fields) 329 | } 330 | 331 | pub fn base58_decode_column( 332 | col: &GenericStringArray, 333 | ) -> Result> { 334 | let mut arr = 335 | builder::GenericBinaryBuilder::::with_capacity(col.len(), col.value_data().len() / 2); 336 | 337 | for v in col.iter() { 338 | match v { 339 | // TODO: this should be optimized by removing allocations if needed 340 | Some(v) => { 341 | let v = bs58::decode(v) 342 | .with_alphabet(bs58::Alphabet::BITCOIN) 343 | .into_vec() 344 | .context("bs58 decode")?; 345 | arr.append_value(v); 346 | } 347 | None => arr.append_null(), 348 | } 349 | } 350 | 351 | Ok(arr.finish()) 352 | } 353 | 354 | pub fn hex_decode_column( 355 | col: &GenericStringArray, 356 | ) -> Result> { 357 | let mut arr = 358 | builder::GenericBinaryBuilder::::with_capacity(col.len(), col.value_data().len() / 2); 359 | 360 | for v in col.iter() { 361 | match v { 362 | // TODO: this should be optimized by removing allocations if needed 363 | Some(v) => { 364 | let v = v.as_bytes(); 365 | let v = if PREFIXED { 366 | v.get(2..).context("index into prefix hex encoded value")? 367 | } else { 368 | v 369 | }; 370 | 371 | let len = v.len(); 372 | let mut dst = vec![0; (len + 1) / 2]; 373 | 374 | faster_hex::hex_decode(v, &mut dst).context("hex decode")?; 375 | 376 | arr.append_value(dst); 377 | } 378 | None => arr.append_null(), 379 | } 380 | } 381 | 382 | Ok(arr.finish()) 383 | } 384 | 385 | pub fn u256_column_from_binary( 386 | col: &GenericBinaryArray, 387 | ) -> Result { 388 | let mut arr = builder::Decimal256Builder::with_capacity(col.len()); 389 | 390 | for v in col.iter() { 391 | match v { 392 | Some(v) => { 393 | let num = ruint::aliases::U256::try_from_be_slice(v).context("parse ruint u256")?; 394 | let num = alloy_primitives::I256::try_from(num) 395 | .with_context(|| format!("u256 to i256. val was {}", num))?; 396 | 397 | let val = arrow::datatypes::i256::from_be_bytes(num.to_be_bytes::<32>()); 398 | arr.append_value(val); 399 | } 400 | None => arr.append_null(), 401 | } 402 | } 403 | 404 | Ok(arr.with_precision_and_scale(76, 0).unwrap().finish()) 405 | } 406 | 407 | pub fn u256_column_to_binary(col: &Decimal256Array) -> Result { 408 | let mut arr = builder::BinaryBuilder::with_capacity(col.len(), col.len() * 32); 409 | 410 | for v in col.iter() { 411 | match v { 412 | Some(v) => { 413 | let num = alloy_primitives::I256::from_be_bytes::<32>(v.to_be_bytes()); 414 | let num = ruint::aliases::U256::try_from(num).context("convert i256 to u256")?; 415 | arr.append_value(num.to_be_bytes_trimmed_vec()); 416 | } 417 | None => { 418 | arr.append_null(); 419 | } 420 | } 421 | } 422 | 423 | Ok(arr.finish()) 424 | } 425 | 426 | /// Converts all Decimal256 (U256) columns in the batch to big endian binary values 427 | pub fn u256_to_binary(data: &RecordBatch) -> Result { 428 | let schema = schema_decimal256_to_binary(data.schema_ref()); 429 | let mut columns = Vec::>::with_capacity(data.columns().len()); 430 | 431 | for (i, col) in data.columns().iter().enumerate() { 432 | if col.data_type() == &DataType::Decimal256(76, 0) { 433 | let col = col.as_any().downcast_ref::().unwrap(); 434 | let x = u256_column_to_binary(col) 435 | .with_context(|| format!("col {} to binary", data.schema().fields()[i].name()))?; 436 | columns.push(Arc::new(x)); 437 | } else { 438 | columns.push(col.clone()); 439 | } 440 | } 441 | 442 | RecordBatch::try_new(Arc::new(schema), columns).context("construct arrow batch") 443 | } 444 | 445 | #[cfg(test)] 446 | mod tests { 447 | use super::*; 448 | use arrow::datatypes::DataType; 449 | use std::fs::File; 450 | 451 | #[test] 452 | #[ignore] 453 | fn test_cast() { 454 | use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; 455 | 456 | let builder = 457 | ParquetRecordBatchReaderBuilder::try_new(File::open("data.parquet").unwrap()).unwrap(); 458 | let mut reader = builder.build().unwrap(); 459 | let table = reader.next().unwrap().unwrap(); 460 | 461 | let type_mappings = vec![ 462 | ("amount0In", DataType::Decimal128(15, 0)), 463 | ("amount1In", DataType::Float32), 464 | ("amount0Out", DataType::Float64), 465 | ("amount1Out", DataType::Decimal128(38, 0)), 466 | ("timestamp", DataType::Int64), 467 | ]; 468 | 469 | let result = cast(&type_mappings, &table, true).unwrap(); 470 | 471 | // Save the filtered instructions to a new parquet file 472 | let mut file = File::create("result.parquet").unwrap(); 473 | let mut writer = 474 | parquet::arrow::ArrowWriter::try_new(&mut file, result.schema(), None).unwrap(); 475 | writer.write(&result).unwrap(); 476 | writer.close().unwrap(); 477 | } 478 | } 479 | -------------------------------------------------------------------------------- /ingest/src/svm.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Context, Result}; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Default, Debug, Clone)] 5 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 6 | pub struct Query { 7 | pub from_block: u64, 8 | pub to_block: Option, 9 | pub include_all_blocks: bool, 10 | pub fields: Fields, 11 | pub instructions: Vec, 12 | pub transactions: Vec, 13 | pub logs: Vec, 14 | pub balances: Vec, 15 | pub token_balances: Vec, 16 | pub rewards: Vec, 17 | } 18 | 19 | #[derive(Debug, Clone, Copy)] 20 | pub struct Address(pub [u8; 32]); 21 | 22 | #[derive(Debug, Clone)] 23 | pub struct Data(pub Vec); 24 | 25 | #[derive(Debug, Clone, Copy)] 26 | pub struct D1(pub [u8; 1]); 27 | 28 | #[derive(Debug, Clone, Copy)] 29 | pub struct D2(pub [u8; 2]); 30 | 31 | #[derive(Debug, Clone, Copy)] 32 | pub struct D3(pub [u8; 3]); 33 | 34 | #[derive(Debug, Clone, Copy)] 35 | pub struct D4(pub [u8; 4]); 36 | 37 | #[derive(Debug, Clone, Copy)] 38 | pub struct D8(pub [u8; 8]); 39 | 40 | #[cfg(feature = "pyo3")] 41 | fn extract_base58(ob: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult<[u8; N]> { 42 | use pyo3::types::PyAnyMethods; 43 | 44 | let s: &str = ob.extract()?; 45 | let mut out = [0; N]; 46 | 47 | bs58::decode(s) 48 | .with_alphabet(bs58::Alphabet::BITCOIN) 49 | .onto(&mut out) 50 | .context("decode base58")?; 51 | 52 | Ok(out) 53 | } 54 | 55 | #[cfg(feature = "pyo3")] 56 | fn extract_data(ob: &pyo3::Bound<'_, pyo3::PyAny>) -> pyo3::PyResult<[u8; N]> { 57 | use pyo3::types::PyAnyMethods; 58 | use pyo3::types::PyTypeMethods; 59 | 60 | let ob_type: String = ob.get_type().name()?.to_string(); 61 | match ob_type.as_str() { 62 | "str" => { 63 | let s: &str = ob.extract()?; 64 | let out = hex_to_bytes(s).context("failed to decode hex")?; 65 | if out.len() != N { 66 | return Err(anyhow!("expected length {}, got {}", N, out.len()).into()); 67 | } 68 | let out: [u8; N] = out 69 | .try_into() 70 | .map_err(|e| anyhow!("failed to convert to array: {:?}", e))?; 71 | Ok(out) 72 | } 73 | "bytes" => { 74 | let out: Vec = ob.extract()?; 75 | if out.len() != N { 76 | return Err(anyhow!("expected length {}, got {}", N, out.len()).into()); 77 | } 78 | let out: [u8; N] = out 79 | .try_into() 80 | .map_err(|e| anyhow!("failed to convert to array: {:?}", e))?; 81 | Ok(out) 82 | } 83 | _ => Err(anyhow!("unknown type: {}", ob_type).into()), 84 | } 85 | } 86 | 87 | fn hex_to_bytes(hex_string: &str) -> Result> { 88 | let hex_string = hex_string.strip_prefix("0x").unwrap_or(hex_string); 89 | let hex_string = if hex_string.len() % 2 == 1 { 90 | format!("0{}", hex_string) 91 | } else { 92 | hex_string.to_string() 93 | }; 94 | let out = (0..hex_string.len()) 95 | .step_by(2) 96 | .map(|i| { 97 | u8::from_str_radix(&hex_string[i..i + 2], 16) 98 | .context("failed to parse hexstring to bytes") 99 | }) 100 | .collect::, _>>()?; 101 | 102 | Ok(out) 103 | } 104 | 105 | #[cfg(feature = "pyo3")] 106 | impl<'py> pyo3::FromPyObject<'py> for Address { 107 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 108 | let out = extract_base58(ob)?; 109 | Ok(Self(out)) 110 | } 111 | } 112 | 113 | #[cfg(feature = "pyo3")] 114 | impl<'py> pyo3::FromPyObject<'py> for Data { 115 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 116 | use pyo3::types::PyAnyMethods; 117 | use pyo3::types::PyTypeMethods; 118 | 119 | let ob_type: String = ob.get_type().name()?.to_string(); 120 | match ob_type.as_str() { 121 | "str" => { 122 | let s: &str = ob.extract()?; 123 | let out = hex_to_bytes(s).context("failed to decode hex")?; 124 | Ok(Self(out)) 125 | } 126 | "bytes" => { 127 | let out: Vec = ob.extract()?; 128 | Ok(Self(out)) 129 | } 130 | _ => Err(anyhow!("unknown type: {}", ob_type).into()), 131 | } 132 | } 133 | } 134 | 135 | #[cfg(feature = "pyo3")] 136 | impl<'py> pyo3::FromPyObject<'py> for D1 { 137 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 138 | let out = extract_data(ob)?; 139 | Ok(Self(out)) 140 | } 141 | } 142 | 143 | #[cfg(feature = "pyo3")] 144 | impl<'py> pyo3::FromPyObject<'py> for D2 { 145 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 146 | let out = extract_data(ob)?; 147 | Ok(Self(out)) 148 | } 149 | } 150 | 151 | #[cfg(feature = "pyo3")] 152 | impl<'py> pyo3::FromPyObject<'py> for D3 { 153 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 154 | let out = extract_data(ob)?; 155 | Ok(Self(out)) 156 | } 157 | } 158 | 159 | #[cfg(feature = "pyo3")] 160 | impl<'py> pyo3::FromPyObject<'py> for D4 { 161 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 162 | let out = extract_data(ob)?; 163 | Ok(Self(out)) 164 | } 165 | } 166 | 167 | #[cfg(feature = "pyo3")] 168 | impl<'py> pyo3::FromPyObject<'py> for D8 { 169 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 170 | let out = extract_data(ob)?; 171 | Ok(Self(out)) 172 | } 173 | } 174 | 175 | #[derive(Default, Debug, Clone)] 176 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 177 | pub struct InstructionRequest { 178 | pub program_id: Vec
, 179 | pub discriminator: Vec, 180 | pub d1: Vec, 181 | pub d2: Vec, 182 | pub d3: Vec, 183 | pub d4: Vec, 184 | pub d8: Vec, 185 | pub a0: Vec
, 186 | pub a1: Vec
, 187 | pub a2: Vec
, 188 | pub a3: Vec
, 189 | pub a4: Vec
, 190 | pub a5: Vec
, 191 | pub a6: Vec
, 192 | pub a7: Vec
, 193 | pub a8: Vec
, 194 | pub a9: Vec
, 195 | pub is_committed: bool, 196 | pub include_transactions: bool, 197 | pub include_transaction_token_balances: bool, 198 | pub include_logs: bool, 199 | pub include_inner_instructions: bool, 200 | pub include_blocks: bool, 201 | } 202 | 203 | #[derive(Default, Debug, Clone)] 204 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 205 | pub struct TransactionRequest { 206 | pub fee_payer: Vec
, 207 | pub include_instructions: bool, 208 | pub include_logs: bool, 209 | pub include_blocks: bool, 210 | } 211 | 212 | #[derive(Default, Debug, Clone)] 213 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 214 | pub struct LogRequest { 215 | pub program_id: Vec
, 216 | pub kind: Vec, 217 | pub include_transactions: bool, 218 | pub include_instructions: bool, 219 | pub include_blocks: bool, 220 | } 221 | 222 | #[derive(Debug, Clone, Copy)] 223 | pub enum LogKind { 224 | Log, 225 | Data, 226 | Other, 227 | } 228 | 229 | impl LogKind { 230 | pub fn as_str(&self) -> &str { 231 | match self { 232 | Self::Log => "log", 233 | Self::Data => "data", 234 | Self::Other => "other", 235 | } 236 | } 237 | 238 | pub fn from_str(s: &str) -> Result { 239 | match s { 240 | "log" => Ok(Self::Log), 241 | "data" => Ok(Self::Data), 242 | "other" => Ok(Self::Other), 243 | _ => Err(anyhow!("unknown log kind: {}", s)), 244 | } 245 | } 246 | } 247 | 248 | #[cfg(feature = "pyo3")] 249 | impl<'py> pyo3::FromPyObject<'py> for LogKind { 250 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 251 | use pyo3::types::PyAnyMethods; 252 | 253 | let s: &str = ob.extract().context("extract string")?; 254 | 255 | Ok(Self::from_str(s).context("from str")?) 256 | } 257 | } 258 | 259 | #[derive(Default, Debug, Clone)] 260 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 261 | pub struct BalanceRequest { 262 | pub account: Vec
, 263 | pub include_transactions: bool, 264 | pub include_transaction_instructions: bool, 265 | pub include_blocks: bool, 266 | } 267 | 268 | #[derive(Default, Debug, Clone)] 269 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 270 | pub struct TokenBalanceRequest { 271 | pub account: Vec
, 272 | pub pre_program_id: Vec
, 273 | pub post_program_id: Vec
, 274 | pub pre_mint: Vec
, 275 | pub post_mint: Vec
, 276 | pub pre_owner: Vec
, 277 | pub post_owner: Vec
, 278 | pub include_transactions: bool, 279 | pub include_transaction_instructions: bool, 280 | pub include_blocks: bool, 281 | } 282 | 283 | #[derive(Default, Debug, Clone)] 284 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 285 | pub struct RewardRequest { 286 | pub pubkey: Vec
, 287 | pub include_blocks: bool, 288 | } 289 | 290 | #[derive(Deserialize, Serialize, Default, Debug, Clone, Copy)] 291 | #[serde(default)] 292 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 293 | pub struct Fields { 294 | pub instruction: InstructionFields, 295 | pub transaction: TransactionFields, 296 | pub log: LogFields, 297 | pub balance: BalanceFields, 298 | pub token_balance: TokenBalanceFields, 299 | pub reward: RewardFields, 300 | pub block: BlockFields, 301 | } 302 | 303 | impl Fields { 304 | pub fn all() -> Self { 305 | Self { 306 | instruction: InstructionFields::all(), 307 | transaction: TransactionFields::all(), 308 | log: LogFields::all(), 309 | balance: BalanceFields::all(), 310 | token_balance: TokenBalanceFields::all(), 311 | reward: RewardFields::all(), 312 | block: BlockFields::all(), 313 | } 314 | } 315 | } 316 | 317 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 318 | #[serde(default)] 319 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 320 | pub struct InstructionFields { 321 | pub block_slot: bool, 322 | pub block_hash: bool, 323 | pub transaction_index: bool, 324 | pub instruction_address: bool, 325 | pub program_id: bool, 326 | pub a0: bool, 327 | pub a1: bool, 328 | pub a2: bool, 329 | pub a3: bool, 330 | pub a4: bool, 331 | pub a5: bool, 332 | pub a6: bool, 333 | pub a7: bool, 334 | pub a8: bool, 335 | pub a9: bool, 336 | pub rest_of_accounts: bool, 337 | pub data: bool, 338 | pub d1: bool, 339 | pub d2: bool, 340 | pub d4: bool, 341 | pub d8: bool, 342 | pub error: bool, 343 | pub compute_units_consumed: bool, 344 | pub is_committed: bool, 345 | pub has_dropped_log_messages: bool, 346 | } 347 | 348 | impl InstructionFields { 349 | pub fn all() -> Self { 350 | InstructionFields { 351 | block_slot: true, 352 | block_hash: true, 353 | transaction_index: true, 354 | instruction_address: true, 355 | program_id: true, 356 | a0: true, 357 | a1: true, 358 | a2: true, 359 | a3: true, 360 | a4: true, 361 | a5: true, 362 | a6: true, 363 | a7: true, 364 | a8: true, 365 | a9: true, 366 | rest_of_accounts: true, 367 | data: true, 368 | d1: true, 369 | d2: true, 370 | d4: true, 371 | d8: true, 372 | error: true, 373 | compute_units_consumed: true, 374 | is_committed: true, 375 | has_dropped_log_messages: true, 376 | } 377 | } 378 | } 379 | 380 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 381 | #[serde(default)] 382 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 383 | pub struct TransactionFields { 384 | pub block_slot: bool, 385 | pub block_hash: bool, 386 | pub transaction_index: bool, 387 | pub signature: bool, 388 | pub version: bool, 389 | pub account_keys: bool, 390 | pub address_table_lookups: bool, 391 | pub num_readonly_signed_accounts: bool, 392 | pub num_readonly_unsigned_accounts: bool, 393 | pub num_required_signatures: bool, 394 | pub recent_blockhash: bool, 395 | pub signatures: bool, 396 | pub err: bool, 397 | pub fee: bool, 398 | pub compute_units_consumed: bool, 399 | pub loaded_readonly_addresses: bool, 400 | pub loaded_writable_addresses: bool, 401 | pub fee_payer: bool, 402 | pub has_dropped_log_messages: bool, 403 | } 404 | 405 | impl TransactionFields { 406 | pub fn all() -> Self { 407 | TransactionFields { 408 | block_slot: true, 409 | block_hash: true, 410 | transaction_index: true, 411 | signature: true, 412 | version: true, 413 | account_keys: true, 414 | address_table_lookups: true, 415 | num_readonly_signed_accounts: true, 416 | num_readonly_unsigned_accounts: true, 417 | num_required_signatures: true, 418 | recent_blockhash: true, 419 | signatures: true, 420 | err: true, 421 | fee: true, 422 | compute_units_consumed: true, 423 | loaded_readonly_addresses: true, 424 | loaded_writable_addresses: true, 425 | fee_payer: true, 426 | has_dropped_log_messages: true, 427 | } 428 | } 429 | } 430 | 431 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 432 | #[serde(default)] 433 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 434 | pub struct LogFields { 435 | pub block_slot: bool, 436 | pub block_hash: bool, 437 | pub transaction_index: bool, 438 | pub log_index: bool, 439 | pub instruction_address: bool, 440 | pub program_id: bool, 441 | pub kind: bool, 442 | pub message: bool, 443 | } 444 | 445 | impl LogFields { 446 | pub fn all() -> Self { 447 | LogFields { 448 | block_slot: true, 449 | block_hash: true, 450 | transaction_index: true, 451 | log_index: true, 452 | instruction_address: true, 453 | program_id: true, 454 | kind: true, 455 | message: true, 456 | } 457 | } 458 | } 459 | 460 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 461 | #[serde(default)] 462 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 463 | pub struct BalanceFields { 464 | pub block_slot: bool, 465 | pub block_hash: bool, 466 | pub transaction_index: bool, 467 | pub account: bool, 468 | pub pre: bool, 469 | pub post: bool, 470 | } 471 | 472 | impl BalanceFields { 473 | pub fn all() -> Self { 474 | BalanceFields { 475 | block_slot: true, 476 | block_hash: true, 477 | transaction_index: true, 478 | account: true, 479 | pre: true, 480 | post: true, 481 | } 482 | } 483 | } 484 | 485 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 486 | #[serde(default)] 487 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 488 | pub struct TokenBalanceFields { 489 | pub block_slot: bool, 490 | pub block_hash: bool, 491 | pub transaction_index: bool, 492 | pub account: bool, 493 | pub pre_mint: bool, 494 | pub post_mint: bool, 495 | pub pre_decimals: bool, 496 | pub post_decimals: bool, 497 | pub pre_program_id: bool, 498 | pub post_program_id: bool, 499 | pub pre_owner: bool, 500 | pub post_owner: bool, 501 | pub pre_amount: bool, 502 | pub post_amount: bool, 503 | } 504 | 505 | impl TokenBalanceFields { 506 | pub fn all() -> Self { 507 | TokenBalanceFields { 508 | block_slot: true, 509 | block_hash: true, 510 | transaction_index: true, 511 | account: true, 512 | pre_mint: true, 513 | post_mint: true, 514 | pre_decimals: true, 515 | post_decimals: true, 516 | pre_program_id: true, 517 | post_program_id: true, 518 | pre_owner: true, 519 | post_owner: true, 520 | pre_amount: true, 521 | post_amount: true, 522 | } 523 | } 524 | } 525 | 526 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 527 | #[serde(default)] 528 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 529 | pub struct RewardFields { 530 | pub block_slot: bool, 531 | pub block_hash: bool, 532 | pub pubkey: bool, 533 | pub lamports: bool, 534 | pub post_balance: bool, 535 | pub reward_type: bool, 536 | pub commission: bool, 537 | } 538 | 539 | impl RewardFields { 540 | pub fn all() -> Self { 541 | RewardFields { 542 | block_slot: true, 543 | block_hash: true, 544 | pubkey: true, 545 | lamports: true, 546 | post_balance: true, 547 | reward_type: true, 548 | commission: true, 549 | } 550 | } 551 | } 552 | 553 | #[derive(Default, Debug, Clone, Copy, Serialize, Deserialize)] 554 | #[serde(default)] 555 | #[cfg_attr(feature = "pyo3", derive(pyo3::FromPyObject))] 556 | pub struct BlockFields { 557 | pub slot: bool, 558 | pub hash: bool, 559 | pub parent_slot: bool, 560 | pub parent_hash: bool, 561 | pub height: bool, 562 | pub timestamp: bool, 563 | } 564 | 565 | impl BlockFields { 566 | pub fn all() -> Self { 567 | BlockFields { 568 | slot: true, 569 | hash: true, 570 | parent_slot: true, 571 | parent_hash: true, 572 | height: true, 573 | timestamp: true, 574 | } 575 | } 576 | } 577 | -------------------------------------------------------------------------------- /svm-decode/src/deserialize.rs: -------------------------------------------------------------------------------- 1 | use anyhow::{anyhow, Context, Result}; 2 | 3 | /// Represents a parameter input with a name and dynamic type 4 | #[derive(Debug, Clone)] 5 | pub struct ParamInput { 6 | pub name: String, 7 | pub param_type: DynType, 8 | } 9 | 10 | #[cfg(feature = "pyo3")] 11 | impl<'py> pyo3::FromPyObject<'py> for ParamInput { 12 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 13 | use pyo3::types::PyAnyMethods; 14 | 15 | let name = ob.getattr("name")?.extract::()?; 16 | let param_type = ob.getattr("param_type")?.extract::()?; 17 | Ok(ParamInput { name, param_type }) 18 | } 19 | } 20 | 21 | /// Represents a dynamic type that can be deserialized from binary data 22 | #[derive(Debug, Clone, PartialEq)] 23 | pub enum DynType { 24 | I8, 25 | I16, 26 | I32, 27 | I64, 28 | I128, 29 | U8, 30 | U16, 31 | U32, 32 | U64, 33 | U128, 34 | Bool, 35 | /// Complex types 36 | FixedArray(Box, usize), 37 | Array(Box), 38 | Struct(Vec<(String, DynType)>), 39 | Enum(Vec<(String, Option)>), 40 | Option(Box), 41 | } 42 | 43 | #[cfg(feature = "pyo3")] 44 | impl<'py> pyo3::FromPyObject<'py> for DynType { 45 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 46 | use pyo3::types::PyAnyMethods; 47 | use pyo3::types::PyTypeMethods; 48 | 49 | let variant_str: String = ob.get_type().name()?.to_string(); 50 | // If the type name is str, it means it's a custom type, and we need to get the actual DynType value 51 | let variant_str = if variant_str == "str" { 52 | ob.to_string() 53 | } else { 54 | variant_str 55 | }; 56 | 57 | match variant_str.as_str() { 58 | "i8" => Ok(DynType::I8), 59 | "i16" => Ok(DynType::I16), 60 | "i32" => Ok(DynType::I32), 61 | "i64" => Ok(DynType::I64), 62 | "i128" => Ok(DynType::I128), 63 | "u8" => Ok(DynType::U8), 64 | "u16" => Ok(DynType::U16), 65 | "u32" => Ok(DynType::U32), 66 | "u64" => Ok(DynType::U64), 67 | "u128" => Ok(DynType::U128), 68 | "bool" => Ok(DynType::Bool), 69 | "FixedArray" => { 70 | let inner_bound = ob 71 | .getattr("element_type") 72 | .context("Failed to retrieve FixedArray element type")?; 73 | let size: usize = ob 74 | .getattr("size") 75 | .context("Failed to retrieve size")? 76 | .extract::()?; 77 | let inner_type = inner_bound.extract::()?; 78 | Ok(DynType::FixedArray(Box::new(inner_type), size)) 79 | } 80 | "Array" => { 81 | let inner_bound = ob 82 | .getattr("element_type") 83 | .context("Failed to retrieve Array element type")?; 84 | let inner_type = inner_bound.extract::()?; 85 | Ok(DynType::Array(Box::new(inner_type))) 86 | } 87 | "Struct" => { 88 | let py_fields = ob 89 | .getattr("fields") 90 | .context("Failed to retrieve Struct fields")?; 91 | let mut fields: Vec<(String, DynType)> = Vec::new(); 92 | for field in py_fields.try_iter()? { 93 | match field { 94 | Ok(field) => { 95 | let name = field 96 | .getattr("name") 97 | .context("Failed to retrieve Struct field name")? 98 | .to_string(); 99 | let param_type = field 100 | .getattr("element_type") 101 | .context("Failed to retrieve Struct field type")? 102 | .extract::()?; 103 | fields.push((name, param_type)); 104 | } 105 | Err(e) => { 106 | return Err(anyhow!( 107 | "Could not convert Struct fields into an iterator. Error: {:?}", 108 | e 109 | ) 110 | .into()) 111 | } 112 | } 113 | } 114 | Ok(DynType::Struct(fields)) 115 | } 116 | "Enum" => { 117 | let py_variants = ob 118 | .getattr("variants") 119 | .context("Failed to retrieve Enum variants")?; 120 | let mut variants: Vec<(String, Option)> = Vec::new(); 121 | for variant in py_variants.try_iter()? { 122 | match variant { 123 | Ok(variant) => { 124 | let name = variant 125 | .getattr("name") 126 | .context("Failed to retrieve Enum variant name")? 127 | .to_string(); 128 | let param_type = variant 129 | .getattr("element_type") 130 | .context("Failed to retrieve Enum variant type")?; 131 | match param_type.to_string().as_str() { 132 | "None" => variants.push((name, None)), 133 | _ => { 134 | let param_type = param_type.extract::()?; 135 | variants.push((name, Some(param_type))); 136 | } 137 | } 138 | } 139 | Err(e) => { 140 | return Err(anyhow!( 141 | "Could not convert Enum variants into an iterator. Error: {:?}", 142 | e 143 | ) 144 | .into()) 145 | } 146 | } 147 | } 148 | Ok(DynType::Enum(variants)) 149 | } 150 | "Option" => { 151 | let inner_bound = ob 152 | .getattr("element_type") 153 | .context("Failed to retrieve Option element type")?; 154 | let inner_type = inner_bound.extract::()?; 155 | Ok(DynType::Option(Box::new(inner_type))) 156 | } 157 | _ => Err(anyhow!("Not yet implemented type: {}", variant_str).into()), 158 | } 159 | } 160 | } 161 | 162 | /// Represents a dynamically deserialized value 163 | #[derive(Debug, Clone)] 164 | pub enum DynValue { 165 | I8(i8), 166 | I16(i16), 167 | I32(i32), 168 | I64(i64), 169 | I128(i128), 170 | U8(u8), 171 | U16(u16), 172 | U32(u32), 173 | U64(u64), 174 | U128(u128), 175 | Bool(bool), 176 | /// Complex values 177 | Array(Vec), 178 | Struct(Vec<(String, DynValue)>), 179 | Enum(String, Option>), 180 | Option(Option>), 181 | } 182 | 183 | /// Deserializes binary data into a vector of dynamic values based on the provided parameter types 184 | /// 185 | /// # Arguments 186 | /// * `data` - The binary data to deserialize 187 | /// * `params` - The parameter types that define the structure of the data 188 | /// * `error_on_remaining` - Weather to error if there is remaining data in the buffer after parsing 189 | /// * given params. 190 | /// 191 | /// # Returns 192 | /// A vector of deserialized values matching the parameter types 193 | /// 194 | /// # Errors 195 | /// Returns an error if: 196 | /// * `error_on_remaining` is `true` and there is not enough data to deserialize all parameters 197 | /// * The data format doesn't match the expected parameter types 198 | /// * There is remaining data after deserializing all parameters 199 | pub fn deserialize_data( 200 | data: &[u8], 201 | params: &[ParamInput], 202 | error_on_remaining: bool, 203 | ) -> Result> { 204 | let mut ix_values = Vec::with_capacity(params.len()); 205 | let mut remaining_data = data; 206 | 207 | for param in params { 208 | // Deserialize value based on type 209 | let (value, new_data) = deserialize_value(¶m.param_type, remaining_data)?; 210 | ix_values.push(value); 211 | remaining_data = new_data; 212 | } 213 | 214 | if error_on_remaining && !remaining_data.is_empty() { 215 | return Err(anyhow!( 216 | "Remaining data after deserialization: {:?}", 217 | remaining_data 218 | )); 219 | } 220 | 221 | Ok(ix_values) 222 | } 223 | 224 | /// Deserializes a single value of the specified type from binary data 225 | /// 226 | /// # Arguments 227 | /// * `param_type` - The type of value to deserialize 228 | /// * `data` - The binary data to deserialize from 229 | /// 230 | /// # Returns 231 | /// A tuple containing: 232 | /// * The deserialized value 233 | /// * The remaining data after deserialization 234 | /// 235 | /// # Errors 236 | /// Returns an error if: 237 | /// * There is not enough data to deserialize the value 238 | /// * The data format doesn't match the expected type 239 | fn deserialize_value<'a>(param_type: &DynType, data: &'a [u8]) -> Result<(DynValue, &'a [u8])> { 240 | match param_type { 241 | DynType::Option(inner_type) => { 242 | let value = data.first().context("Not enough data for option")?; 243 | match value { 244 | 0 => Ok((DynValue::Option(None), &data[1..])), 245 | 1 => { 246 | let (value, new_data) = deserialize_value(inner_type, &data[1..])?; 247 | Ok((DynValue::Option(Some(Box::new(value))), new_data)) 248 | } 249 | _ => Err(anyhow!("Invalid option value: {}", value)), 250 | } 251 | } 252 | DynType::I8 => { 253 | if data.is_empty() { 254 | return Err(anyhow!( 255 | "Not enough data for i8: expected 1 byte, got {}", 256 | data.len() 257 | )); 258 | } 259 | let value = i8::from_le_bytes(data[..1].try_into().unwrap()); 260 | Ok((DynValue::I8(value), &data[1..])) 261 | } 262 | DynType::I16 => { 263 | if data.len() < 2 { 264 | return Err(anyhow!( 265 | "Not enough data for i16: expected 2 bytes, got {}", 266 | data.len() 267 | )); 268 | } 269 | let value = i16::from_le_bytes(data[..2].try_into().unwrap()); 270 | Ok((DynValue::I16(value), &data[2..])) 271 | } 272 | DynType::I32 => { 273 | if data.len() < 4 { 274 | return Err(anyhow!( 275 | "Not enough data for i32: expected 4 bytes, got {}", 276 | data.len() 277 | )); 278 | } 279 | let value = i32::from_le_bytes(data[..4].try_into().unwrap()); 280 | Ok((DynValue::I32(value), &data[4..])) 281 | } 282 | DynType::I64 => { 283 | if data.len() < 8 { 284 | return Err(anyhow!( 285 | "Not enough data for i64: expected 8 bytes, got {}", 286 | data.len() 287 | )); 288 | } 289 | let value = i64::from_le_bytes(data[..8].try_into().unwrap()); 290 | Ok((DynValue::I64(value), &data[8..])) 291 | } 292 | DynType::I128 => { 293 | if data.len() < 16 { 294 | return Err(anyhow!( 295 | "Not enough data for i128: expected 16 bytes, got {}", 296 | data.len() 297 | )); 298 | } 299 | let value = i128::from_le_bytes(data[..16].try_into().unwrap()); 300 | Ok((DynValue::I128(value), &data[16..])) 301 | } 302 | DynType::U8 => { 303 | if data.is_empty() { 304 | return Err(anyhow!("Not enough data for u8: expected 1 byte, got 0")); 305 | } 306 | let value = data[0]; 307 | Ok((DynValue::U8(value), &data[1..])) 308 | } 309 | DynType::U16 => { 310 | if data.len() < 2 { 311 | return Err(anyhow!( 312 | "Not enough data for u16: expected 2 bytes, got {}", 313 | data.len() 314 | )); 315 | } 316 | let value = u16::from_le_bytes(data[..2].try_into().unwrap()); 317 | Ok((DynValue::U16(value), &data[2..])) 318 | } 319 | DynType::U32 => { 320 | if data.len() < 4 { 321 | return Err(anyhow!( 322 | "Not enough data for u32: expected 4 bytes, got {}", 323 | data.len() 324 | )); 325 | } 326 | let value = u32::from_le_bytes(data[..4].try_into().unwrap()); 327 | Ok((DynValue::U32(value), &data[4..])) 328 | } 329 | DynType::U64 => { 330 | if data.len() < 8 { 331 | return Err(anyhow!( 332 | "Not enough data for u64: expected 8 bytes, got {}", 333 | data.len() 334 | )); 335 | } 336 | let value = u64::from_le_bytes(data[..8].try_into().unwrap()); 337 | Ok((DynValue::U64(value), &data[8..])) 338 | } 339 | DynType::U128 => { 340 | if data.len() < 16 { 341 | return Err(anyhow!( 342 | "Not enough data for u128: expected 16 bytes, got {}", 343 | data.len() 344 | )); 345 | } 346 | let value = u128::from_le_bytes(data[..16].try_into().unwrap()); 347 | Ok((DynValue::U128(value), &data[16..])) 348 | } 349 | DynType::Bool => { 350 | if data.is_empty() { 351 | return Err(anyhow!("Not enough data for bool: expected 1 byte, got 0")); 352 | } 353 | let value = data[0] != 0; 354 | Ok((DynValue::Bool(value), &data[1..])) 355 | } 356 | DynType::FixedArray(inner_type, size) => { 357 | let inner_type_size = check_type_size(inner_type)?; 358 | let total_size = inner_type_size * size; 359 | 360 | if data.len() < total_size { 361 | return Err(anyhow!( 362 | "Not enough data for fixed array: expected {} bytes, got {}", 363 | total_size, 364 | data.len() 365 | )); 366 | } 367 | let value = data[..total_size] 368 | .to_vec() 369 | .chunks(inner_type_size) 370 | .map(|chunk| { 371 | let (value, _) = deserialize_value(inner_type, chunk)?; 372 | Ok(value) 373 | }) 374 | .collect::>>()?; 375 | Ok((DynValue::Array(value), &data[total_size..])) 376 | } 377 | DynType::Array(inner_type) => { 378 | if data.len() < 4 { 379 | return Err(anyhow!( 380 | "Not enough data for vector length: expected 4 bytes, got {}", 381 | data.len() 382 | )); 383 | } 384 | let length = u32::from_le_bytes(data[..4].try_into().unwrap()) as usize; 385 | let mut remaining_data = &data[4..]; 386 | 387 | let mut values = Vec::with_capacity(length); 388 | for _ in 0..length { 389 | let (value, new_data) = deserialize_value(inner_type, remaining_data)?; 390 | values.push(value); 391 | remaining_data = new_data; 392 | } 393 | 394 | Ok((DynValue::Array(values), remaining_data)) 395 | } 396 | DynType::Struct(fields) => { 397 | let mut values = Vec::new(); 398 | let mut remaining_data = data; 399 | for field in fields { 400 | let (value, new_data) = deserialize_value(&field.1, remaining_data)?; 401 | values.push((field.0.clone(), value)); 402 | remaining_data = new_data; 403 | } 404 | Ok((DynValue::Struct(values), remaining_data)) 405 | } 406 | DynType::Enum(variants) => { 407 | if data.is_empty() { 408 | return Err(anyhow!( 409 | "Not enough data for enum: expected at least 1 byte for variant index" 410 | )); 411 | } 412 | let variant_index = data[0] as usize; 413 | let remaining_data = &data[1..]; 414 | 415 | if variant_index >= variants.len() { 416 | return Err(anyhow!("Invalid enum variant index: {}", variant_index)); 417 | } 418 | 419 | let (variant_name, variant_type) = &variants[variant_index]; 420 | 421 | if let Some(variant_type) = variant_type { 422 | let (variant_value, new_data) = deserialize_value(variant_type, remaining_data)?; 423 | Ok(( 424 | DynValue::Enum(variant_name.clone(), Some(Box::new(variant_value))), 425 | new_data, 426 | )) 427 | } else { 428 | Ok((DynValue::Enum(variant_name.clone(), None), remaining_data)) 429 | } 430 | } 431 | } 432 | } 433 | 434 | fn check_type_size(param_type: &DynType) -> Result { 435 | match param_type { 436 | DynType::U8 => Ok(1), 437 | DynType::U16 => Ok(2), 438 | DynType::U32 => Ok(4), 439 | DynType::U64 => Ok(8), 440 | DynType::U128 => Ok(16), 441 | DynType::I8 => Ok(1), 442 | DynType::I16 => Ok(2), 443 | DynType::I32 => Ok(4), 444 | DynType::I64 => Ok(8), 445 | DynType::I128 => Ok(16), 446 | DynType::Bool => Ok(1), 447 | _ => Err(anyhow!("Unsupported primitive type for fixed array")), 448 | } 449 | } 450 | -------------------------------------------------------------------------------- /svm-schema/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use arrow::array::builder; 4 | use arrow::datatypes::{DataType, Field, Fields, Schema}; 5 | use arrow::record_batch::RecordBatch; 6 | 7 | pub fn blocks_schema() -> Schema { 8 | Schema::new(vec![ 9 | Field::new("slot", DataType::UInt64, true), 10 | Field::new("hash", DataType::Binary, true), 11 | Field::new("parent_slot", DataType::UInt64, true), 12 | Field::new("parent_hash", DataType::Binary, true), 13 | Field::new("height", DataType::UInt64, true), 14 | Field::new("timestamp", DataType::Int64, true), 15 | ]) 16 | } 17 | 18 | pub fn rewards_schema() -> Schema { 19 | Schema::new(vec![ 20 | Field::new("block_slot", DataType::UInt64, true), 21 | Field::new("block_hash", DataType::Binary, true), 22 | Field::new("pubkey", DataType::Binary, true), 23 | Field::new("lamports", DataType::Int64, true), 24 | Field::new("post_balance", DataType::UInt64, true), 25 | Field::new("reward_type", DataType::Utf8, true), 26 | Field::new("commission", DataType::UInt8, true), 27 | ]) 28 | } 29 | 30 | pub fn token_balances_schema() -> Schema { 31 | Schema::new(vec![ 32 | Field::new("block_slot", DataType::UInt64, true), 33 | Field::new("block_hash", DataType::Binary, true), 34 | Field::new("transaction_index", DataType::UInt32, true), 35 | Field::new("account", DataType::Binary, true), 36 | Field::new("pre_mint", DataType::Binary, true), 37 | Field::new("post_mint", DataType::Binary, true), 38 | Field::new("pre_decimals", DataType::UInt16, true), 39 | Field::new("post_decimals", DataType::UInt16, true), 40 | Field::new("pre_program_id", DataType::Binary, true), 41 | Field::new("post_program_id", DataType::Binary, true), 42 | Field::new("pre_owner", DataType::Binary, true), 43 | Field::new("post_owner", DataType::Binary, true), 44 | Field::new("pre_amount", DataType::UInt64, true), 45 | Field::new("post_amount", DataType::UInt64, true), 46 | ]) 47 | } 48 | 49 | pub fn balances_schema() -> Schema { 50 | Schema::new(vec![ 51 | Field::new("block_slot", DataType::UInt64, true), 52 | Field::new("block_hash", DataType::Binary, true), 53 | Field::new("transaction_index", DataType::UInt32, true), 54 | Field::new("account", DataType::Binary, true), 55 | Field::new("pre", DataType::UInt64, true), 56 | Field::new("post", DataType::UInt64, true), 57 | ]) 58 | } 59 | 60 | pub fn logs_schema() -> Schema { 61 | Schema::new(vec![ 62 | Field::new("block_slot", DataType::UInt64, true), 63 | Field::new("block_hash", DataType::Binary, true), 64 | Field::new("transaction_index", DataType::UInt32, true), 65 | Field::new("log_index", DataType::UInt32, true), 66 | Field::new( 67 | "instruction_address", 68 | DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))), 69 | true, 70 | ), 71 | Field::new("program_id", DataType::Binary, true), 72 | Field::new("kind", DataType::Utf8, true), 73 | Field::new("message", DataType::Utf8, true), 74 | ]) 75 | } 76 | 77 | pub fn transactions_schema() -> Schema { 78 | Schema::new(vec![ 79 | Field::new("block_slot", DataType::UInt64, true), 80 | Field::new("block_hash", DataType::Binary, true), 81 | Field::new("transaction_index", DataType::UInt32, true), 82 | Field::new("signature", DataType::Binary, true), 83 | Field::new("version", DataType::Int8, true), 84 | Field::new( 85 | "account_keys", 86 | DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), 87 | true, 88 | ), 89 | Field::new( 90 | "address_table_lookups", 91 | DataType::List(Arc::new(Field::new( 92 | "item", 93 | address_table_lookup_dt(), 94 | true, 95 | ))), 96 | true, 97 | ), 98 | Field::new("num_readonly_signed_accounts", DataType::UInt32, true), 99 | Field::new("num_readonly_unsigned_accounts", DataType::UInt32, true), 100 | Field::new("num_required_signatures", DataType::UInt32, true), 101 | Field::new("recent_blockhash", DataType::Binary, true), 102 | Field::new( 103 | "signatures", 104 | DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), 105 | true, 106 | ), 107 | // encoded as json string 108 | Field::new("err", DataType::Utf8, true), 109 | Field::new("fee", DataType::UInt64, true), 110 | Field::new("compute_units_consumed", DataType::UInt64, true), 111 | Field::new( 112 | "loaded_readonly_addresses", 113 | DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), 114 | true, 115 | ), 116 | Field::new( 117 | "loaded_writable_addresses", 118 | DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), 119 | true, 120 | ), 121 | Field::new("fee_payer", DataType::Binary, true), 122 | Field::new("has_dropped_log_messages", DataType::Boolean, true), 123 | ]) 124 | } 125 | 126 | fn address_table_lookup_dt() -> DataType { 127 | DataType::Struct(Fields::from(vec![ 128 | Arc::new(Field::new("account_key", DataType::Binary, true)), 129 | Arc::new(Field::new( 130 | "writable_indexes", 131 | DataType::List(Arc::new(Field::new("item", DataType::UInt64, true))), 132 | true, 133 | )), 134 | Arc::new(Field::new( 135 | "readonly_indexes", 136 | DataType::List(Arc::new(Field::new("item", DataType::UInt64, true))), 137 | true, 138 | )), 139 | ])) 140 | } 141 | 142 | pub fn instructions_schema() -> Schema { 143 | Schema::new(vec![ 144 | Field::new("block_slot", DataType::UInt64, true), 145 | Field::new("block_hash", DataType::Binary, true), 146 | Field::new("transaction_index", DataType::UInt32, true), 147 | Field::new( 148 | "instruction_address", 149 | DataType::List(Arc::new(Field::new("item", DataType::UInt32, true))), 150 | true, 151 | ), 152 | Field::new("program_id", DataType::Binary, true), 153 | Field::new("a0", DataType::Binary, true), 154 | Field::new("a1", DataType::Binary, true), 155 | Field::new("a2", DataType::Binary, true), 156 | Field::new("a3", DataType::Binary, true), 157 | Field::new("a4", DataType::Binary, true), 158 | Field::new("a5", DataType::Binary, true), 159 | Field::new("a6", DataType::Binary, true), 160 | Field::new("a7", DataType::Binary, true), 161 | Field::new("a8", DataType::Binary, true), 162 | Field::new("a9", DataType::Binary, true), 163 | // accounts starting from index 10 164 | Field::new( 165 | "rest_of_accounts", 166 | DataType::List(Arc::new(Field::new("item", DataType::Binary, true))), 167 | true, 168 | ), 169 | Field::new("data", DataType::Binary, true), 170 | Field::new("d1", DataType::Binary, true), 171 | Field::new("d2", DataType::Binary, true), 172 | Field::new("d4", DataType::Binary, true), 173 | Field::new("d8", DataType::Binary, true), 174 | Field::new("error", DataType::Utf8, true), 175 | Field::new("compute_units_consumed", DataType::UInt64, true), 176 | Field::new("is_committed", DataType::Boolean, true), 177 | Field::new("has_dropped_log_messages", DataType::Boolean, true), 178 | ]) 179 | } 180 | 181 | #[derive(Default)] 182 | pub struct BlocksBuilder { 183 | pub slot: builder::UInt64Builder, 184 | pub hash: builder::BinaryBuilder, 185 | pub parent_slot: builder::UInt64Builder, 186 | pub parent_hash: builder::BinaryBuilder, 187 | pub height: builder::UInt64Builder, 188 | pub timestamp: builder::Int64Builder, 189 | } 190 | 191 | impl BlocksBuilder { 192 | pub fn finish(mut self) -> RecordBatch { 193 | RecordBatch::try_new( 194 | Arc::new(blocks_schema()), 195 | vec![ 196 | Arc::new(self.slot.finish()), 197 | Arc::new(self.hash.finish()), 198 | Arc::new(self.parent_slot.finish()), 199 | Arc::new(self.parent_hash.finish()), 200 | Arc::new(self.height.finish()), 201 | Arc::new(self.timestamp.finish()), 202 | ], 203 | ) 204 | .unwrap() 205 | } 206 | } 207 | 208 | #[derive(Default)] 209 | pub struct RewardsBuilder { 210 | pub block_slot: builder::UInt64Builder, 211 | pub block_hash: builder::BinaryBuilder, 212 | pub pubkey: builder::BinaryBuilder, 213 | pub lamports: builder::Int64Builder, 214 | pub post_balance: builder::UInt64Builder, 215 | pub reward_type: builder::StringBuilder, 216 | pub commission: builder::UInt8Builder, 217 | } 218 | 219 | impl RewardsBuilder { 220 | pub fn finish(mut self) -> RecordBatch { 221 | RecordBatch::try_new( 222 | Arc::new(rewards_schema()), 223 | vec![ 224 | Arc::new(self.block_slot.finish()), 225 | Arc::new(self.block_hash.finish()), 226 | Arc::new(self.pubkey.finish()), 227 | Arc::new(self.lamports.finish()), 228 | Arc::new(self.post_balance.finish()), 229 | Arc::new(self.reward_type.finish()), 230 | Arc::new(self.commission.finish()), 231 | ], 232 | ) 233 | .unwrap() 234 | } 235 | } 236 | 237 | #[derive(Default)] 238 | pub struct TokenBalancesBuilder { 239 | pub block_slot: builder::UInt64Builder, 240 | pub block_hash: builder::BinaryBuilder, 241 | pub transaction_index: builder::UInt32Builder, 242 | pub account: builder::BinaryBuilder, 243 | pub pre_mint: builder::BinaryBuilder, 244 | pub post_mint: builder::BinaryBuilder, 245 | pub pre_decimals: builder::UInt16Builder, 246 | pub post_decimals: builder::UInt16Builder, 247 | pub pre_program_id: builder::BinaryBuilder, 248 | pub post_program_id: builder::BinaryBuilder, 249 | pub pre_owner: builder::BinaryBuilder, 250 | pub post_owner: builder::BinaryBuilder, 251 | pub pre_amount: builder::UInt64Builder, 252 | pub post_amount: builder::UInt64Builder, 253 | } 254 | 255 | impl TokenBalancesBuilder { 256 | pub fn finish(mut self) -> RecordBatch { 257 | RecordBatch::try_new( 258 | Arc::new(token_balances_schema()), 259 | vec![ 260 | Arc::new(self.block_slot.finish()), 261 | Arc::new(self.block_hash.finish()), 262 | Arc::new(self.transaction_index.finish()), 263 | Arc::new(self.account.finish()), 264 | Arc::new(self.pre_mint.finish()), 265 | Arc::new(self.post_mint.finish()), 266 | Arc::new(self.pre_decimals.finish()), 267 | Arc::new(self.post_decimals.finish()), 268 | Arc::new(self.pre_program_id.finish()), 269 | Arc::new(self.post_program_id.finish()), 270 | Arc::new(self.pre_owner.finish()), 271 | Arc::new(self.post_owner.finish()), 272 | Arc::new(self.pre_amount.finish()), 273 | Arc::new(self.post_amount.finish()), 274 | ], 275 | ) 276 | .unwrap() 277 | } 278 | } 279 | 280 | #[derive(Default)] 281 | pub struct BalancesBuilder { 282 | pub block_slot: builder::UInt64Builder, 283 | pub block_hash: builder::BinaryBuilder, 284 | pub transaction_index: builder::UInt32Builder, 285 | pub account: builder::BinaryBuilder, 286 | pub pre: builder::UInt64Builder, 287 | pub post: builder::UInt64Builder, 288 | } 289 | 290 | impl BalancesBuilder { 291 | pub fn finish(mut self) -> RecordBatch { 292 | RecordBatch::try_new( 293 | Arc::new(balances_schema()), 294 | vec![ 295 | Arc::new(self.block_slot.finish()), 296 | Arc::new(self.block_hash.finish()), 297 | Arc::new(self.transaction_index.finish()), 298 | Arc::new(self.account.finish()), 299 | Arc::new(self.pre.finish()), 300 | Arc::new(self.post.finish()), 301 | ], 302 | ) 303 | .unwrap() 304 | } 305 | } 306 | 307 | #[derive(Default)] 308 | pub struct LogsBuilder { 309 | pub block_slot: builder::UInt64Builder, 310 | pub block_hash: builder::BinaryBuilder, 311 | pub transaction_index: builder::UInt32Builder, 312 | pub log_index: builder::UInt32Builder, 313 | pub instruction_address: builder::ListBuilder, 314 | pub program_id: builder::BinaryBuilder, 315 | pub kind: builder::StringBuilder, 316 | pub message: builder::StringBuilder, 317 | } 318 | 319 | impl LogsBuilder { 320 | pub fn finish(mut self) -> RecordBatch { 321 | RecordBatch::try_new( 322 | Arc::new(logs_schema()), 323 | vec![ 324 | Arc::new(self.block_slot.finish()), 325 | Arc::new(self.block_hash.finish()), 326 | Arc::new(self.transaction_index.finish()), 327 | Arc::new(self.log_index.finish()), 328 | Arc::new(self.instruction_address.finish()), 329 | Arc::new(self.program_id.finish()), 330 | Arc::new(self.kind.finish()), 331 | Arc::new(self.message.finish()), 332 | ], 333 | ) 334 | .unwrap() 335 | } 336 | } 337 | 338 | #[derive(Default)] 339 | pub struct TransactionsBuilder { 340 | pub block_slot: builder::UInt64Builder, 341 | pub block_hash: builder::BinaryBuilder, 342 | pub transaction_index: builder::UInt32Builder, 343 | pub signature: builder::BinaryBuilder, 344 | pub version: builder::Int8Builder, 345 | pub account_keys: builder::ListBuilder, 346 | pub address_table_lookups: AddressTableLookupsBuilder, 347 | pub num_readonly_signed_accounts: builder::UInt32Builder, 348 | pub num_readonly_unsigned_accounts: builder::UInt32Builder, 349 | pub num_required_signatures: builder::UInt32Builder, 350 | pub recent_blockhash: builder::BinaryBuilder, 351 | pub signatures: builder::ListBuilder, 352 | pub err: builder::StringBuilder, 353 | pub fee: builder::UInt64Builder, 354 | pub compute_units_consumed: builder::UInt64Builder, 355 | pub loaded_readonly_addresses: builder::ListBuilder, 356 | pub loaded_writable_addresses: builder::ListBuilder, 357 | pub fee_payer: builder::BinaryBuilder, 358 | pub has_dropped_log_messages: builder::BooleanBuilder, 359 | } 360 | 361 | pub struct AddressTableLookupsBuilder(pub builder::ListBuilder); 362 | 363 | impl Default for AddressTableLookupsBuilder { 364 | fn default() -> Self { 365 | Self(builder::ListBuilder::new(builder::StructBuilder::new( 366 | match address_table_lookup_dt() { 367 | DataType::Struct(fields) => fields, 368 | _ => unreachable!(), 369 | }, 370 | vec![ 371 | Box::new(builder::BinaryBuilder::default()), 372 | Box::new(builder::ListBuilder::new(builder::UInt64Builder::default())), 373 | Box::new(builder::ListBuilder::new(builder::UInt64Builder::default())), 374 | ], 375 | ))) 376 | } 377 | } 378 | 379 | impl TransactionsBuilder { 380 | pub fn finish(mut self) -> RecordBatch { 381 | RecordBatch::try_new( 382 | Arc::new(transactions_schema()), 383 | vec![ 384 | Arc::new(self.block_slot.finish()), 385 | Arc::new(self.block_hash.finish()), 386 | Arc::new(self.transaction_index.finish()), 387 | Arc::new(self.signature.finish()), 388 | Arc::new(self.version.finish()), 389 | Arc::new(self.account_keys.finish()), 390 | Arc::new(self.address_table_lookups.0.finish()), 391 | Arc::new(self.num_readonly_signed_accounts.finish()), 392 | Arc::new(self.num_readonly_unsigned_accounts.finish()), 393 | Arc::new(self.num_required_signatures.finish()), 394 | Arc::new(self.recent_blockhash.finish()), 395 | Arc::new(self.signatures.finish()), 396 | Arc::new(self.err.finish()), 397 | Arc::new(self.fee.finish()), 398 | Arc::new(self.compute_units_consumed.finish()), 399 | Arc::new(self.loaded_readonly_addresses.finish()), 400 | Arc::new(self.loaded_writable_addresses.finish()), 401 | Arc::new(self.fee_payer.finish()), 402 | Arc::new(self.has_dropped_log_messages.finish()), 403 | ], 404 | ) 405 | .unwrap() 406 | } 407 | } 408 | 409 | #[derive(Default)] 410 | pub struct InstructionsBuilder { 411 | pub block_slot: builder::UInt64Builder, 412 | pub block_hash: builder::BinaryBuilder, 413 | pub transaction_index: builder::UInt32Builder, 414 | pub instruction_address: builder::ListBuilder, 415 | pub program_id: builder::BinaryBuilder, 416 | pub a0: builder::BinaryBuilder, 417 | pub a1: builder::BinaryBuilder, 418 | pub a2: builder::BinaryBuilder, 419 | pub a3: builder::BinaryBuilder, 420 | pub a4: builder::BinaryBuilder, 421 | pub a5: builder::BinaryBuilder, 422 | pub a6: builder::BinaryBuilder, 423 | pub a7: builder::BinaryBuilder, 424 | pub a8: builder::BinaryBuilder, 425 | pub a9: builder::BinaryBuilder, 426 | pub rest_of_accounts: builder::ListBuilder, 427 | pub data: builder::BinaryBuilder, 428 | pub d1: builder::BinaryBuilder, 429 | pub d2: builder::BinaryBuilder, 430 | pub d4: builder::BinaryBuilder, 431 | pub d8: builder::BinaryBuilder, 432 | pub error: builder::StringBuilder, 433 | pub compute_units_consumed: builder::UInt64Builder, 434 | pub is_committed: builder::BooleanBuilder, 435 | pub has_dropped_log_messages: builder::BooleanBuilder, 436 | } 437 | 438 | impl InstructionsBuilder { 439 | pub fn finish(mut self) -> RecordBatch { 440 | RecordBatch::try_new( 441 | Arc::new(instructions_schema()), 442 | vec![ 443 | Arc::new(self.block_slot.finish()), 444 | Arc::new(self.block_hash.finish()), 445 | Arc::new(self.transaction_index.finish()), 446 | Arc::new(self.instruction_address.finish()), 447 | Arc::new(self.program_id.finish()), 448 | Arc::new(self.a0.finish()), 449 | Arc::new(self.a1.finish()), 450 | Arc::new(self.a2.finish()), 451 | Arc::new(self.a3.finish()), 452 | Arc::new(self.a4.finish()), 453 | Arc::new(self.a5.finish()), 454 | Arc::new(self.a6.finish()), 455 | Arc::new(self.a7.finish()), 456 | Arc::new(self.a8.finish()), 457 | Arc::new(self.a9.finish()), 458 | Arc::new(self.rest_of_accounts.finish()), 459 | Arc::new(self.data.finish()), 460 | Arc::new(self.d1.finish()), 461 | Arc::new(self.d2.finish()), 462 | Arc::new(self.d4.finish()), 463 | Arc::new(self.d8.finish()), 464 | Arc::new(self.error.finish()), 465 | Arc::new(self.compute_units_consumed.finish()), 466 | Arc::new(self.is_committed.finish()), 467 | Arc::new(self.has_dropped_log_messages.finish()), 468 | ], 469 | ) 470 | .unwrap() 471 | } 472 | } 473 | 474 | #[cfg(test)] 475 | mod tests { 476 | use super::*; 477 | 478 | #[test] 479 | fn smoke() { 480 | BlocksBuilder::default().finish(); 481 | RewardsBuilder::default().finish(); 482 | TokenBalancesBuilder::default().finish(); 483 | BalancesBuilder::default().finish(); 484 | LogsBuilder::default().finish(); 485 | TransactionsBuilder::default().finish(); 486 | InstructionsBuilder::default().finish(); 487 | } 488 | } 489 | -------------------------------------------------------------------------------- /python/src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::sync::LazyLock; 2 | 3 | use anyhow::{anyhow, Context}; 4 | use arrow::array::{ 5 | Array, ArrayData, BinaryArray, Decimal256Array, LargeBinaryArray, LargeStringArray, 6 | RecordBatch, StringArray, 7 | }; 8 | use arrow::datatypes::{DataType, Schema}; 9 | use arrow::pyarrow::{FromPyArrow, ToPyArrow}; 10 | use baselib::svm_decode::{InstructionSignature, LogSignature}; 11 | use pyo3::prelude::*; 12 | use tikv_jemallocator::Jemalloc; 13 | 14 | mod ingest; 15 | 16 | #[global_allocator] 17 | static GLOBAL: Jemalloc = Jemalloc; 18 | 19 | static TOKIO_RUNTIME: LazyLock = LazyLock::new(|| { 20 | tokio::runtime::Builder::new_multi_thread() 21 | .enable_all() 22 | .build() 23 | .unwrap() 24 | }); 25 | 26 | #[pymodule] 27 | fn cherry_core(py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { 28 | env_logger::try_init().ok(); 29 | 30 | m.add_function(wrap_pyfunction!(cast, m)?)?; 31 | m.add_function(wrap_pyfunction!(cast_schema, m)?)?; 32 | m.add_function(wrap_pyfunction!(cast_by_type, m)?)?; 33 | m.add_function(wrap_pyfunction!(cast_schema_by_type, m)?)?; 34 | m.add_function(wrap_pyfunction!(base58_encode, m)?)?; 35 | m.add_function(wrap_pyfunction!(base58_encode_column, m)?)?; 36 | m.add_function(wrap_pyfunction!(hex_encode, m)?)?; 37 | m.add_function(wrap_pyfunction!(prefix_hex_encode, m)?)?; 38 | m.add_function(wrap_pyfunction!(hex_encode_column, m)?)?; 39 | m.add_function(wrap_pyfunction!(prefix_hex_encode_column, m)?)?; 40 | m.add_function(wrap_pyfunction!(base58_decode_column, m)?)?; 41 | m.add_function(wrap_pyfunction!(hex_decode_column, m)?)?; 42 | m.add_function(wrap_pyfunction!(prefix_hex_decode_column, m)?)?; 43 | m.add_function(wrap_pyfunction!(u256_column_from_binary, m)?)?; 44 | m.add_function(wrap_pyfunction!(u256_column_to_binary, m)?)?; 45 | m.add_function(wrap_pyfunction!(u256_to_binary, m)?)?; 46 | m.add_function(wrap_pyfunction!(svm_decode_instructions, m)?)?; 47 | m.add_function(wrap_pyfunction!(svm_decode_logs, m)?)?; 48 | m.add_function(wrap_pyfunction!(instruction_signature_to_arrow_schema, m)?)?; 49 | m.add_function(wrap_pyfunction!(evm_decode_call_inputs, m)?)?; 50 | m.add_function(wrap_pyfunction!(evm_decode_call_outputs, m)?)?; 51 | m.add_function(wrap_pyfunction!(evm_decode_events, m)?)?; 52 | m.add_function(wrap_pyfunction!(evm_event_signature_to_arrow_schema, m)?)?; 53 | m.add_function(wrap_pyfunction!( 54 | evm_function_signature_to_arrow_schemas, 55 | m 56 | )?)?; 57 | m.add_function(wrap_pyfunction!(evm_signature_to_topic0, m)?)?; 58 | m.add_function(wrap_pyfunction!(base58_encode_bytes, m)?)?; 59 | m.add_function(wrap_pyfunction!(base58_decode_string, m)?)?; 60 | ingest::ingest_module(py, m)?; 61 | 62 | Ok(()) 63 | } 64 | 65 | struct CastDataType(DataType); 66 | 67 | impl<'py> pyo3::FromPyObject<'py> for CastDataType { 68 | fn extract_bound(ob: &pyo3::Bound<'py, pyo3::PyAny>) -> pyo3::PyResult { 69 | let dt = DataType::from_pyarrow_bound(ob)?; 70 | Ok(Self(dt)) 71 | } 72 | } 73 | 74 | #[pyfunction] 75 | fn cast( 76 | map: Vec<(String, CastDataType)>, 77 | batch: &Bound<'_, PyAny>, 78 | allow_cast_fail: bool, 79 | py: Python<'_>, 80 | ) -> PyResult { 81 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 82 | let map = map 83 | .into_iter() 84 | .map(|(name, dt)| (name, dt.0)) 85 | .collect::>(); 86 | 87 | let batch = baselib::cast::cast(&map, &batch, allow_cast_fail).context("cast")?; 88 | 89 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 90 | } 91 | 92 | #[pyfunction] 93 | fn cast_schema( 94 | map: Vec<(String, CastDataType)>, 95 | schema: &Bound<'_, PyAny>, 96 | py: Python<'_>, 97 | ) -> PyResult { 98 | let schema = Schema::from_pyarrow_bound(schema).context("convert schema from pyarrow")?; 99 | let map = map 100 | .into_iter() 101 | .map(|(name, dt)| (name, dt.0)) 102 | .collect::>(); 103 | 104 | let schema = baselib::cast::cast_schema(&map, &schema).context("cast")?; 105 | 106 | Ok(schema 107 | .to_pyarrow(py) 108 | .context("map result back to pyarrow")?) 109 | } 110 | 111 | #[pyfunction] 112 | fn cast_by_type( 113 | batch: &Bound<'_, PyAny>, 114 | from_type: &Bound<'_, PyAny>, 115 | to_type: &Bound<'_, PyAny>, 116 | allow_cast_fail: bool, 117 | py: Python<'_>, 118 | ) -> PyResult { 119 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 120 | 121 | let from_type = 122 | DataType::from_pyarrow_bound(from_type).context("convert from_type to pyarrow")?; 123 | let to_type = DataType::from_pyarrow_bound(to_type).context("convert to_type to pyarrow")?; 124 | 125 | let batch = baselib::cast::cast_by_type(&batch, &from_type, &to_type, allow_cast_fail) 126 | .context("cast")?; 127 | 128 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 129 | } 130 | 131 | #[pyfunction] 132 | fn cast_schema_by_type( 133 | schema: &Bound<'_, PyAny>, 134 | from_type: &Bound<'_, PyAny>, 135 | to_type: &Bound<'_, PyAny>, 136 | py: Python<'_>, 137 | ) -> PyResult { 138 | let schema = Schema::from_pyarrow_bound(schema).context("convert schema from pyarrow")?; 139 | 140 | let from_type = 141 | DataType::from_pyarrow_bound(from_type).context("convert from_type to pyarrow")?; 142 | let to_type = DataType::from_pyarrow_bound(to_type).context("convert to_type to pyarrow")?; 143 | 144 | let schema = 145 | baselib::cast::cast_schema_by_type(&schema, &from_type, &to_type).context("cast")?; 146 | 147 | Ok(schema 148 | .to_pyarrow(py) 149 | .context("map result back to pyarrow")?) 150 | } 151 | 152 | #[pyfunction] 153 | fn hex_encode(batch: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 154 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 155 | 156 | let batch = baselib::cast::hex_encode::(&batch).context("encode to hex")?; 157 | 158 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 159 | } 160 | 161 | #[pyfunction] 162 | fn base58_encode(batch: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 163 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 164 | 165 | let batch = baselib::cast::base58_encode(&batch).context("encode to base58")?; 166 | 167 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 168 | } 169 | 170 | #[pyfunction] 171 | fn prefix_hex_encode(batch: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 172 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 173 | 174 | let batch = baselib::cast::hex_encode::(&batch).context("encode to prefix hex")?; 175 | 176 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 177 | } 178 | 179 | #[pyfunction] 180 | fn u256_to_binary(batch: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 181 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 182 | 183 | let batch = baselib::cast::u256_to_binary(&batch).context("map u256 columns to binary")?; 184 | 185 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 186 | } 187 | 188 | #[pyfunction] 189 | fn base58_encode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 190 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 191 | 192 | // Ensure data is aligned (by potentially copying the buffers). 193 | // This is needed because some python code (for example the 194 | // python flight client) produces unaligned buffers 195 | // See https://github.com/apache/arrow/issues/43552 for details 196 | // 197 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 198 | col.align_buffers(); 199 | 200 | if col.data_type() != &DataType::Binary { 201 | return Err(anyhow!("unexpected data type {}. Expected Binary", col.data_type()).into()); 202 | } 203 | let col = BinaryArray::from(col); 204 | 205 | let col = baselib::cast::base58_encode_column(&col); 206 | 207 | Ok(col 208 | .into_data() 209 | .to_pyarrow(py) 210 | .context("map result back to pyarrow")?) 211 | } 212 | 213 | #[pyfunction] 214 | fn hex_encode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 215 | hex_encode_column_impl::(col, py) 216 | } 217 | 218 | #[pyfunction] 219 | fn prefix_hex_encode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 220 | hex_encode_column_impl::(col, py) 221 | } 222 | 223 | fn hex_encode_column_impl( 224 | col: &Bound<'_, PyAny>, 225 | py: Python<'_>, 226 | ) -> PyResult { 227 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 228 | 229 | // Ensure data is aligned (by potentially copying the buffers). 230 | // This is needed because some python code (for example the 231 | // python flight client) produces unaligned buffers 232 | // See https://github.com/apache/arrow/issues/43552 for details 233 | // 234 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 235 | col.align_buffers(); 236 | 237 | if col.data_type() == &DataType::Binary { 238 | let col = BinaryArray::from(col); 239 | let col = baselib::cast::hex_encode_column::(&col); 240 | Ok(col 241 | .into_data() 242 | .to_pyarrow(py) 243 | .context("map result back to pyarrow")?) 244 | } else if col.data_type() == &DataType::LargeBinary { 245 | let col = LargeBinaryArray::from(col); 246 | let col = baselib::cast::hex_encode_column::(&col); 247 | Ok(col 248 | .into_data() 249 | .to_pyarrow(py) 250 | .context("map result back to pyarrow")?) 251 | } else { 252 | Err(anyhow!( 253 | "unexpected data type {}. Expected Binary or LargeBinary", 254 | col.data_type() 255 | ) 256 | .into()) 257 | } 258 | } 259 | 260 | #[pyfunction] 261 | fn base58_decode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 262 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 263 | 264 | // Ensure data is aligned (by potentially copying the buffers). 265 | // This is needed because some python code (for example the 266 | // python flight client) produces unaligned buffers 267 | // See https://github.com/apache/arrow/issues/43552 for details 268 | // 269 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 270 | col.align_buffers(); 271 | 272 | if col.data_type() == &DataType::Utf8 { 273 | let col = StringArray::from(col); 274 | let col = baselib::cast::base58_decode_column(&col).context("base58 decode")?; 275 | Ok(col 276 | .into_data() 277 | .to_pyarrow(py) 278 | .context("map result back to pyarrow")?) 279 | } else if col.data_type() == &DataType::LargeUtf8 { 280 | let col = LargeStringArray::from(col); 281 | let col = baselib::cast::base58_decode_column(&col).context("base58 decode")?; 282 | Ok(col 283 | .into_data() 284 | .to_pyarrow(py) 285 | .context("map result back to pyarrow")?) 286 | } else { 287 | Err(anyhow!( 288 | "unexpected data type {}. Expected String or LargeString", 289 | col.data_type() 290 | ) 291 | .into()) 292 | } 293 | } 294 | 295 | #[pyfunction] 296 | fn hex_decode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 297 | hex_decode_column_impl::(col, py) 298 | } 299 | 300 | #[pyfunction] 301 | fn prefix_hex_decode_column(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 302 | hex_decode_column_impl::(col, py) 303 | } 304 | 305 | fn hex_decode_column_impl( 306 | col: &Bound<'_, PyAny>, 307 | py: Python<'_>, 308 | ) -> PyResult { 309 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 310 | 311 | // Ensure data is aligned (by potentially copying the buffers). 312 | // This is needed because some python code (for example the 313 | // python flight client) produces unaligned buffers 314 | // See https://github.com/apache/arrow/issues/43552 for details 315 | // 316 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 317 | col.align_buffers(); 318 | 319 | if col.data_type() == &DataType::Utf8 { 320 | let col = StringArray::from(col); 321 | let col = baselib::cast::hex_decode_column::(&col).context("hex decode")?; 322 | Ok(col 323 | .into_data() 324 | .to_pyarrow(py) 325 | .context("map result back to pyarrow")?) 326 | } else if col.data_type() == &DataType::LargeUtf8 { 327 | let col = LargeStringArray::from(col); 328 | let col = baselib::cast::hex_decode_column::(&col).context("hex decode")?; 329 | Ok(col 330 | .into_data() 331 | .to_pyarrow(py) 332 | .context("map result back to pyarrow")?) 333 | } else { 334 | Err(anyhow!( 335 | "unexpected data type {}. Expected String or LargeString", 336 | col.data_type() 337 | ) 338 | .into()) 339 | } 340 | } 341 | 342 | #[pyfunction] 343 | fn u256_column_from_binary(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 344 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 345 | 346 | // Ensure data is aligned (by potentially copying the buffers). 347 | // This is needed because some python code (for example the 348 | // python flight client) produces unaligned buffers 349 | // See https://github.com/apache/arrow/issues/43552 for details 350 | // 351 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 352 | col.align_buffers(); 353 | 354 | if col.data_type() != &DataType::Binary { 355 | return Err(anyhow!("unexpected data type {}. Expected Binary", col.data_type()).into()); 356 | } 357 | let col = BinaryArray::from(col); 358 | 359 | let col = baselib::cast::u256_column_from_binary(&col).context("u256 from binary")?; 360 | 361 | Ok(col 362 | .into_data() 363 | .to_pyarrow(py) 364 | .context("map result back to pyarrow")?) 365 | } 366 | 367 | #[pyfunction] 368 | fn u256_column_to_binary(col: &Bound<'_, PyAny>, py: Python<'_>) -> PyResult { 369 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 370 | 371 | // Ensure data is aligned (by potentially copying the buffers). 372 | // This is needed because some python code (for example the 373 | // python flight client) produces unaligned buffers 374 | // See https://github.com/apache/arrow/issues/43552 for details 375 | // 376 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 377 | col.align_buffers(); 378 | 379 | if col.data_type() != &DataType::Decimal256(76, 0) { 380 | return Err(anyhow!( 381 | "unexpected data type {}. Expected Decimal256", 382 | col.data_type() 383 | ) 384 | .into()); 385 | } 386 | let col = Decimal256Array::from(col); 387 | 388 | let col = baselib::cast::u256_column_to_binary(&col).context("u256 to binary")?; 389 | 390 | Ok(col 391 | .into_data() 392 | .to_pyarrow(py) 393 | .context("map result back to pyarrow")?) 394 | } 395 | 396 | #[pyfunction] 397 | fn svm_decode_instructions( 398 | signature: &Bound<'_, PyAny>, 399 | batch: &Bound<'_, PyAny>, 400 | allow_decode_fail: bool, 401 | py: Python<'_>, 402 | ) -> PyResult { 403 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 404 | 405 | let instruction_signature = signature.extract::()?; 406 | let batch = baselib::svm_decode::decode_instructions_batch( 407 | instruction_signature, 408 | &batch, 409 | allow_decode_fail, 410 | ) 411 | .context("decode instruction batch")?; 412 | 413 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 414 | } 415 | 416 | #[pyfunction] 417 | fn svm_decode_logs( 418 | signature: &Bound<'_, PyAny>, 419 | batch: &Bound<'_, PyAny>, 420 | allow_decode_fail: bool, 421 | py: Python<'_>, 422 | ) -> PyResult { 423 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 424 | 425 | let log_signature = signature.extract::()?; 426 | 427 | let batch = baselib::svm_decode::decode_logs_batch(log_signature, &batch, allow_decode_fail) 428 | .context("decode log batch")?; 429 | 430 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 431 | } 432 | 433 | #[pyfunction] 434 | fn instruction_signature_to_arrow_schema( 435 | signature: &Bound<'_, PyAny>, 436 | py: Python<'_>, 437 | ) -> PyResult { 438 | let signature = signature.extract::()?; 439 | let schema = baselib::svm_decode::instruction_signature_to_arrow_schema(&signature) 440 | .context("signature to schema")?; 441 | 442 | Ok(schema 443 | .to_pyarrow(py) 444 | .context("map result back to pyarrow")?) 445 | } 446 | 447 | #[pyfunction] 448 | fn evm_decode_call_inputs( 449 | signature: &str, 450 | col: &Bound<'_, PyAny>, 451 | allow_decode_fail: bool, 452 | py: Python<'_>, 453 | ) -> PyResult { 454 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 455 | 456 | // Ensure data is aligned (by potentially copying the buffers). 457 | // This is needed because some python code (for example the 458 | // python flight client) produces unaligned buffers 459 | // See https://github.com/apache/arrow/issues/43552 for details 460 | // 461 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 462 | col.align_buffers(); 463 | 464 | if col.data_type() != &DataType::Binary { 465 | return Err(anyhow!("unexpected data type {}. Expected Binary", col.data_type()).into()); 466 | } 467 | let col = BinaryArray::from(col); 468 | 469 | let batch = baselib::evm_decode::decode_call_inputs(signature, &col, allow_decode_fail) 470 | .context("decode cal inputs")?; 471 | 472 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 473 | } 474 | 475 | #[pyfunction] 476 | fn evm_decode_call_outputs( 477 | signature: &str, 478 | col: &Bound<'_, PyAny>, 479 | allow_decode_fail: bool, 480 | py: Python<'_>, 481 | ) -> PyResult { 482 | let mut col = ArrayData::from_pyarrow_bound(col).context("convert column from pyarrow")?; 483 | 484 | // Ensure data is aligned (by potentially copying the buffers). 485 | // This is needed because some python code (for example the 486 | // python flight client) produces unaligned buffers 487 | // See https://github.com/apache/arrow/issues/43552 for details 488 | // 489 | // https://github.com/apache/arrow-rs/blob/764b34af4abf39e46575b1e8e3eaf0a36976cafb/arrow/src/pyarrow.rs#L374 490 | col.align_buffers(); 491 | 492 | if col.data_type() != &DataType::Binary { 493 | return Err(anyhow!("unexpected data type {}. Expected Binary", col.data_type()).into()); 494 | } 495 | let col = BinaryArray::from(col); 496 | 497 | let batch = baselib::evm_decode::decode_call_outputs(signature, &col, allow_decode_fail) 498 | .context("decode cal outputs")?; 499 | 500 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 501 | } 502 | 503 | #[pyfunction] 504 | fn evm_decode_events( 505 | signature: &str, 506 | batch: &Bound<'_, PyAny>, 507 | allow_decode_fail: bool, 508 | py: Python<'_>, 509 | ) -> PyResult { 510 | let batch = RecordBatch::from_pyarrow_bound(batch).context("convert batch from pyarrow")?; 511 | 512 | let batch = baselib::evm_decode::decode_events(signature, &batch, allow_decode_fail) 513 | .context("decode events")?; 514 | 515 | Ok(batch.to_pyarrow(py).context("map result back to pyarrow")?) 516 | } 517 | 518 | #[pyfunction] 519 | fn evm_event_signature_to_arrow_schema(signature: &str, py: Python<'_>) -> PyResult { 520 | let schema = baselib::evm_decode::event_signature_to_arrow_schema(signature) 521 | .context("signature to schema")?; 522 | 523 | Ok(schema 524 | .to_pyarrow(py) 525 | .context("map result back to pyarrow")?) 526 | } 527 | 528 | #[pyfunction] 529 | fn evm_function_signature_to_arrow_schemas( 530 | signature: &str, 531 | py: Python<'_>, 532 | ) -> PyResult<(PyObject, PyObject)> { 533 | let (input_schema, output_schema) = 534 | baselib::evm_decode::function_signature_to_arrow_schemas(signature) 535 | .context("signature to schemas")?; 536 | 537 | let input_schema = input_schema 538 | .to_pyarrow(py) 539 | .context("input schema to pyarrow")?; 540 | let output_schema = output_schema 541 | .to_pyarrow(py) 542 | .context("output schema to pyarrow")?; 543 | 544 | Ok((input_schema, output_schema)) 545 | } 546 | 547 | #[pyfunction] 548 | fn evm_signature_to_topic0(signature: &str) -> PyResult { 549 | let topic0 = baselib::evm_decode::signature_to_topic0(signature)?; 550 | 551 | Ok(format!("0x{}", faster_hex::hex_string(topic0.as_slice()))) 552 | } 553 | 554 | #[pyfunction] 555 | fn base58_encode_bytes(bytes: &[u8]) -> String { 556 | bs58::encode(bytes) 557 | .with_alphabet(bs58::Alphabet::BITCOIN) 558 | .into_string() 559 | } 560 | 561 | #[pyfunction] 562 | fn base58_decode_string(s: &str) -> PyResult> { 563 | bs58::decode(s) 564 | .with_alphabet(bs58::Alphabet::BITCOIN) 565 | .into_vec() 566 | .context("decode bs58") 567 | .map_err(Into::into) 568 | } 569 | --------------------------------------------------------------------------------