├── .gitignore
├── CHANGELOG.md
├── LICENSE
├── README.md
├── cryo_mcp
    ├── __init__.py
    ├── server.py
    └── sql.py
├── ethereum__blocks_22001067_to_22001067.json
├── ethereum__blocks__00001000_to_00001004.json
├── ethereum__blocks__22005903_to_22005907.json
├── pyproject.toml
├── tests
    ├── __init__.py
    ├── data
    │   └── ethereum__blocks__00001000_to_00001004.parquet
    ├── test_blocks.py
    ├── test_contract_transactions.py
    ├── test_cryo.py
    ├── test_latest_block.py
    ├── test_latest_functions.py
    ├── test_mcp_functions.py
    ├── test_sql.py
    ├── test_transaction_by_hash.py
    └── test_transactions.py
└── uv.lock


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | *.so
 6 | .Python
 7 | build/
 8 | develop-eggs/
 9 | dist/
10 | downloads/
11 | eggs/
12 | .eggs/
13 | lib/
14 | lib64/
15 | parts/
16 | sdist/
17 | var/
18 | wheels/
19 | *.egg-info/
20 | .installed.cfg
21 | *.egg
22 | .pytest_cache/
23 | .coverage
24 | htmlcov/
25 | 
26 | # Environment
27 | .env
28 | .venv
29 | env/
30 | venv/
31 | ENV/
32 | .python-version
33 | 
34 | # IDE files
35 | .idea/
36 | .vscode/
37 | *.swp
38 | *.swo
39 | 
40 | # OS specific
41 | .DS_Store
42 | Thumbs.db
43 | 
44 | # Cryo specific
45 | .cryo/
46 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 | 
3 | ## 0.1.0 (Initial Release)
4 | 
5 | - Initial Cryo MCP implementation
6 | - Support for querying Ethereum blocks, transactions, and contracts
7 | - Integration with Claude Code
8 | - Command-line interface


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 z80
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Cryo MCP 🧊
  2 | 
  3 | A Model Completion Protocol (MCP) server for the [Cryo](https://github.com/paradigmxyz/cryo) blockchain data extraction tool. 
  4 | 
  5 | Cryo MCP allows you to access Cryo's powerful blockchain data extraction capabilities via an API server that implements the MCP protocol, making it easy to query blockchain data from any MCP-compatible client.
  6 | 
  7 | ## For LLM Users: SQL Query Workflow Guide
  8 | 
  9 | When using this MCP server to run SQL queries on blockchain data, follow this workflow:
 10 | 
 11 | 1. **Download data** with `query_dataset`:
 12 |    ```python
 13 |    result = query_dataset(
 14 |        dataset="blocks",  # or "transactions", "logs", etc.
 15 |        blocks="15000000:15001000",  # or use blocks_from_latest=100
 16 |        output_format="parquet"  # important: use parquet for SQL
 17 |    )
 18 |    files = result.get("files", [])  # Get the returned file paths
 19 |    ```
 20 | 
 21 | 2. **Explore schema** with `get_sql_table_schema`:
 22 |    ```python
 23 |    # Check what columns are available in the file
 24 |    schema = get_sql_table_schema(files[0])
 25 |    # Now you can see all columns, data types, and sample data
 26 |    ```
 27 | 
 28 | 3. **Run SQL** with `query_sql`:
 29 |    ```python
 30 |    # Option 1: Simple table reference (DuckDB will match the table name to file)
 31 |    sql_result = query_sql(
 32 |        query="SELECT block_number, timestamp, gas_used FROM blocks",
 33 |        files=files  # Pass the files from step 1
 34 |    )
 35 |    
 36 |    # Option 2: Using read_parquet() with explicit file path
 37 |    sql_result = query_sql(
 38 |        query=f"SELECT block_number, timestamp, gas_used FROM read_parquet('{files[0]}')",
 39 |        files=files  # Pass the files from step 1
 40 |    )
 41 |    ```
 42 | 
 43 | Alternatively, use the combined approach with `query_blockchain_sql`:
 44 | ```python
 45 | # Option 1: Simple table reference
 46 | result = query_blockchain_sql(
 47 |     sql_query="SELECT * FROM blocks",
 48 |     dataset="blocks",
 49 |     blocks_from_latest=100
 50 | )
 51 | 
 52 | # Option 2: Using read_parquet()
 53 | result = query_blockchain_sql(
 54 |     sql_query="SELECT * FROM read_parquet('/path/to/file.parquet')",  # Path doesn't matter
 55 |     dataset="blocks",
 56 |     blocks_from_latest=100
 57 | )
 58 | ```
 59 | 
 60 | For a complete working example, see [examples/sql_workflow_example.py](examples/sql_workflow_example.py).
 61 | 
 62 | ## Features
 63 | 
 64 | - **Full Cryo Dataset Access**: Query any Cryo dataset through an API server
 65 | - **MCP Integration**: Works seamlessly with MCP clients
 66 | - **Flexible Query Options**: Support for all major Cryo filtering and output options
 67 | - **Block Range Options**: Query specific blocks, latest block, or relative ranges
 68 | - **Contract Filtering**: Filter data by contract address 
 69 | - **Latest Block Access**: Easy access to the latest Ethereum block data
 70 | - **Multiple Output Formats**: JSON, CSV, and Parquet support
 71 | - **Schema Information**: Get detailed dataset schemas and sample data
 72 | - **SQL Queries**: Run SQL queries directly against downloaded blockchain data
 73 | 
 74 | ## Installation (Optional)
 75 | 
 76 | This is not required if you will run the tool with `uvx` directly.
 77 | 
 78 | ```bash
 79 | # install with UV (recommended)
 80 | uv tool install cryo-mcp
 81 | ```
 82 | 
 83 | ## Requirements
 84 | 
 85 | - Python 3.8+
 86 | - uv
 87 | - A working installation of [Cryo](https://github.com/paradigmxyz/cryo)
 88 | - Access to an Ethereum RPC endpoint
 89 | - DuckDB (for SQL query functionality)
 90 | 
 91 | ## Quick Start
 92 | 
 93 | ### Usage with Claude Code
 94 | 
 95 | 1. Run `claude mcp add` for an interactive prompt.
 96 | 2. Enter `uvx` as the command to run.
 97 | 3. Enter `cryo-mcp --rpc-url <ETH_RPC_URL> [--data-dir <DATA_DIR>]` as the args
 98 | 4. Alternatively, provide `ETH_RPC_URL` and `CRYO_DATA_DIR` as environment variables instead.
 99 | 
100 | New instances of `claude` will now have access to cryo as configured to hit your RPC endpoint and store data in the specified directory.
101 | 
102 | ## Available Tools
103 | 
104 | Cryo MCP exposes the following MCP tools:
105 | 
106 | ### `list_datasets()`
107 | 
108 | Returns a list of all available Cryo datasets.
109 | 
110 | Example:
111 | ```python
112 | client.list_datasets()
113 | ```
114 | 
115 | ### `query_dataset()`
116 | 
117 | Query a Cryo dataset with various filtering options.
118 | 
119 | Parameters:
120 | - `dataset` (str): The name of the dataset to query (e.g., 'blocks', 'transactions', 'logs')
121 | - `blocks` (str, optional): Block range specification (e.g., '1000:1010')
122 | - `start_block` (int, optional): Start block number (alternative to blocks)
123 | - `end_block` (int, optional): End block number (alternative to blocks)
124 | - `use_latest` (bool, optional): If True, query the latest block
125 | - `blocks_from_latest` (int, optional): Number of blocks from latest to include
126 | - `contract` (str, optional): Contract address to filter by
127 | - `output_format` (str, optional): Output format ('json', 'csv', 'parquet')
128 | - `include_columns` (list, optional): Columns to include alongside defaults
129 | - `exclude_columns` (list, optional): Columns to exclude from defaults
130 | 
131 | Example:
132 | ```python
133 | # Get transactions from blocks 15M to 15.01M
134 | client.query_dataset('transactions', blocks='15M:15.01M')
135 | 
136 | # Get logs for a specific contract from the latest 100 blocks
137 | client.query_dataset('logs', blocks_from_latest=100, contract='0x1234...')
138 | 
139 | # Get just the latest block
140 | client.query_dataset('blocks', use_latest=True)
141 | ```
142 | 
143 | ### `lookup_dataset()`
144 | 
145 | Get detailed information about a specific dataset, including schema and sample data.
146 | 
147 | Parameters:
148 | - `name` (str): The name of the dataset to look up
149 | - `sample_start_block` (int, optional): Start block for sample data
150 | - `sample_end_block` (int, optional): End block for sample data
151 | - `use_latest_sample` (bool, optional): Use latest block for sample
152 | - `sample_blocks_from_latest` (int, optional): Number of blocks from latest for sample
153 | 
154 | Example:
155 | ```python
156 | client.lookup_dataset('logs')
157 | ```
158 | 
159 | ### `get_latest_ethereum_block()`
160 | 
161 | Returns information about the latest Ethereum block.
162 | 
163 | Example:
164 | ```python
165 | client.get_latest_ethereum_block()
166 | ```
167 | 
168 | ### SQL Query Tools
169 | 
170 | Cryo MCP includes several tools for running SQL queries against blockchain data:
171 | 
172 | ### `query_sql()`
173 | 
174 | Run a SQL query against downloaded blockchain data.
175 | 
176 | Parameters:
177 | - `query` (str): SQL query to execute
178 | - `files` (list, optional): List of parquet file paths to query. If None, will use all files in the data directory.
179 | - `include_schema` (bool, optional): Whether to include schema information in the result
180 | 
181 | Example:
182 | ```python
183 | # Run against all available files
184 | client.query_sql("SELECT * FROM read_parquet('/path/to/blocks.parquet') LIMIT 10")
185 | 
186 | # Run against specific files
187 | client.query_sql(
188 |     "SELECT * FROM read_parquet('/path/to/blocks.parquet') LIMIT 10",
189 |     files=['/path/to/blocks.parquet']
190 | )
191 | ```
192 | 
193 | ### `query_blockchain_sql()`
194 | 
195 | Query blockchain data using SQL, automatically downloading any required data.
196 | 
197 | Parameters:
198 | - `sql_query` (str): SQL query to execute
199 | - `dataset` (str, optional): The dataset to query (e.g., 'blocks', 'transactions')
200 | - `blocks` (str, optional): Block range specification
201 | - `start_block` (int, optional): Start block number
202 | - `end_block` (int, optional): End block number
203 | - `use_latest` (bool, optional): If True, query the latest block
204 | - `blocks_from_latest` (int, optional): Number of blocks before the latest to include
205 | - `contract` (str, optional): Contract address to filter by
206 | - `force_refresh` (bool, optional): Force download of new data even if it exists
207 | - `include_schema` (bool, optional): Include schema information in the result
208 | 
209 | Example:
210 | ```python
211 | # Automatically downloads blocks data if needed, then runs the SQL query
212 | client.query_blockchain_sql(
213 |     sql_query="SELECT block_number, gas_used, timestamp FROM blocks ORDER BY gas_used DESC LIMIT 10",
214 |     dataset="blocks",
215 |     blocks_from_latest=100
216 | )
217 | ```
218 | 
219 | ### `list_available_sql_tables()`
220 | 
221 | List all available tables that can be queried with SQL.
222 | 
223 | Example:
224 | ```python
225 | client.list_available_sql_tables()
226 | ```
227 | 
228 | ### `get_sql_table_schema()`
229 | 
230 | Get the schema for a specific parquet file.
231 | 
232 | Parameters:
233 | - `file_path` (str): Path to the parquet file
234 | 
235 | Example:
236 | ```python
237 | client.get_sql_table_schema("/path/to/blocks.parquet")
238 | ```
239 | 
240 | ### `get_sql_examples()`
241 | 
242 | Get example SQL queries for different blockchain datasets.
243 | 
244 | Example:
245 | ```python
246 | client.get_sql_examples()
247 | ```
248 | 
249 | ## Configuration Options
250 | 
251 | When starting the Cryo MCP server, you can use these command-line options:
252 | 
253 | - `--rpc-url URL`: Ethereum RPC URL (overrides ETH_RPC_URL environment variable)
254 | - `--data-dir PATH`: Directory to store downloaded data (overrides CRYO_DATA_DIR environment variable, defaults to ~/.cryo-mcp/data/)
255 | 
256 | ## Environment Variables
257 | 
258 | - `ETH_RPC_URL`: Default Ethereum RPC URL to use when not specified via command line
259 | - `CRYO_DATA_DIR`: Default directory to store downloaded data when not specified via command line
260 | 
261 | ## Advanced Usage
262 | 
263 | ### SQL Queries Against Blockchain Data
264 | 
265 | Cryo MCP allows you to run powerful SQL queries against blockchain data, combining the flexibility of SQL with Cryo's data extraction capabilities:
266 | 
267 | #### Two-Step SQL Query Flow
268 | 
269 | You can split data extraction and querying into two separate steps:
270 | 
271 | ```python
272 | # Step 1: Download data and get file paths
273 | download_result = client.query_dataset(
274 |     dataset="transactions",
275 |     blocks_from_latest=1000,
276 |     output_format="parquet"
277 | )
278 | 
279 | # Step 2: Use the file paths to run SQL queries
280 | file_paths = download_result.get("files", [])
281 | client.query_sql(
282 |     query=f"""
283 |     SELECT 
284 |         to_address as contract_address, 
285 |         COUNT(*) as tx_count,
286 |         SUM(gas_used) as total_gas,
287 |         AVG(gas_used) as avg_gas
288 |     FROM read_parquet('{file_paths[0]}')
289 |     WHERE to_address IS NOT NULL
290 |     GROUP BY to_address
291 |     ORDER BY total_gas DESC
292 |     LIMIT 20
293 |     """,
294 |     files=file_paths
295 | )
296 | ```
297 | 
298 | #### Combined SQL Query Flow
299 | 
300 | For convenience, you can also use the combined function that handles both steps:
301 | 
302 | ```python
303 | # Get top gas-consuming contracts
304 | client.query_blockchain_sql(
305 |     sql_query="""
306 |     SELECT 
307 |         to_address as contract_address, 
308 |         COUNT(*) as tx_count,
309 |         SUM(gas_used) as total_gas,
310 |         AVG(gas_used) as avg_gas
311 |     FROM read_parquet('/path/to/transactions.parquet')
312 |     WHERE to_address IS NOT NULL
313 |     GROUP BY to_address
314 |     ORDER BY total_gas DESC
315 |     LIMIT 20
316 |     """,
317 |     dataset="transactions",
318 |     blocks_from_latest=1000
319 | )
320 | 
321 | # Find blocks with the most transactions
322 | client.query_blockchain_sql(
323 |     sql_query="""
324 |     SELECT 
325 |         block_number, 
326 |         COUNT(*) as tx_count
327 |     FROM read_parquet('/path/to/transactions.parquet')
328 |     GROUP BY block_number
329 |     ORDER BY tx_count DESC
330 |     LIMIT 10
331 |     """,
332 |     dataset="transactions",
333 |     blocks="15M:16M"
334 | )
335 | 
336 | # Analyze event logs by topic
337 | client.query_blockchain_sql(
338 |     sql_query="""
339 |     SELECT 
340 |         topic0, 
341 |         COUNT(*) as event_count
342 |     FROM read_parquet('/path/to/logs.parquet')
343 |     GROUP BY topic0
344 |     ORDER BY event_count DESC
345 |     LIMIT 20
346 |     """,
347 |     dataset="logs",
348 |     blocks_from_latest=100
349 | )
350 | ```
351 | 
352 | **Note**: For SQL queries, always use `output_format="parquet"` when downloading data to ensure optimal performance with DuckDB. When using `query_blockchain_sql`, you should refer to the file paths directly in your SQL using the `read_parquet()` function.
353 | 
354 | ### Querying with Block Ranges
355 | 
356 | Cryo MCP supports the full range of Cryo's block specification syntax:
357 | 
358 | ```python
359 | # Using block numbers
360 | client.query_dataset('transactions', blocks='15000000:15001000')
361 | 
362 | # Using K/M notation
363 | client.query_dataset('logs', blocks='15M:15.01M')
364 | 
365 | # Using offsets from latest 
366 | client.query_dataset('blocks', blocks_from_latest=100)
367 | ```
368 | 
369 | ### Contract Filtering
370 | 
371 | Filter logs and other data by contract address:
372 | 
373 | ```python
374 | # Get all logs for USDC contract
375 | client.query_dataset('logs', 
376 |                     blocks='16M:16.1M', 
377 |                     contract='0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48')
378 | ```
379 | 
380 | ### Column Selection
381 | 
382 | Include only the columns you need:
383 | 
384 | ```python
385 | # Get just block numbers and timestamps
386 | client.query_dataset('blocks', 
387 |                     blocks='16M:16.1M', 
388 |                     include_columns=['number', 'timestamp'])
389 | ```
390 | 
391 | ## Development
392 | 
393 | ### Project Structure
394 | 
395 | ```
396 | cryo-mcp/
397 | ├── cryo_mcp/           # Main package directory
398 | │   ├── __init__.py     # Package initialization
399 | │   ├── server.py       # Main MCP server implementation
400 | │   ├── sql.py          # SQL query functionality
401 | ├── tests/              # Test directory
402 | │   ├── test_*.py       # Test files
403 | ├── pyproject.toml      # Project configuration
404 | ├── README.md           # Project documentation
405 | ```
406 | 
407 | ### Run Tests
408 | 
409 | `uv run pytest`
410 | 
411 | ## License
412 | 
413 | MIT
414 | 
415 | ## Credits
416 | 
417 | - Built on top of the amazing [Cryo](https://github.com/paradigmxyz/cryo) tool by Paradigm
418 | - Uses the [MCP protocol](https://github.com/mcp-team/mcp) for API communication
419 | 


--------------------------------------------------------------------------------
/cryo_mcp/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Cryo MCP Server - Query Ethereum blockchain data using cryo and MCP
3 | """
4 | 
5 | __version__ = "0.1.4"
6 | 


--------------------------------------------------------------------------------
/cryo_mcp/server.py:
--------------------------------------------------------------------------------
   1 | # cryo_mcp/server.py
   2 | """
   3 | Cryo MCP - A Model Completion Protocol server for the Cryo blockchain data extraction tool.
   4 | 
   5 | This module provides a server that exposes Cryo's functionality through the MCP protocol,
   6 | allowing blockchain data querying through an API interface geared at usage by LLMs.
   7 | """
   8 | import json
   9 | import os
  10 | import subprocess
  11 | import requests
  12 | import argparse
  13 | import sys
  14 | from pathlib import Path
  15 | from typing import List, Optional, Dict, Any, Union
  16 | from mcp.server.fastmcp import FastMCP
  17 | 
  18 | # Get the default RPC URL from environment or use fallback
  19 | DEFAULT_RPC_URL = "http://localhost:8545"
  20 | 
  21 | # Default data directory for storing output
  22 | DEFAULT_DATA_DIR = str(Path.home() / ".cryo-mcp" / "data")
  23 | 
  24 | # Create an MCP server
  25 | mcp = FastMCP("Cryo Data Server")
  26 | 
  27 | def get_latest_block_number() -> Optional[int]:
  28 |     """Get the latest block number from the Ethereum node"""
  29 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
  30 |     
  31 |     payload = {
  32 |         "jsonrpc": "2.0",
  33 |         "method": "eth_blockNumber",
  34 |         "params": [],
  35 |         "id": 1
  36 |     }
  37 |     
  38 |     try:
  39 |         response = requests.post(rpc_url, json=payload)
  40 |         response_data = response.json()
  41 |         
  42 |         if 'result' in response_data:
  43 |             # Convert hex to int
  44 |             latest_block = int(response_data['result'], 16)
  45 |             print(f"Latest block number: {latest_block}")
  46 |             return latest_block
  47 |         else:
  48 |             print(f"Error fetching latest block: {response_data.get('error', 'Unknown error')}")
  49 |             return None
  50 |     except Exception as e:
  51 |         print(f"Exception when fetching latest block: {e}")
  52 |         return None
  53 | 
  54 | @mcp.tool()
  55 | def list_datasets() -> List[str]:
  56 |     """Return a list of all available cryo datasets"""
  57 |     # Ensure we have the RPC URL
  58 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
  59 |     
  60 |     result = subprocess.run(
  61 |         ["cryo", "help", "datasets", "-r", rpc_url],
  62 |         capture_output=True,
  63 |         text=True
  64 |     )
  65 | 
  66 |     # Parse the output to extract dataset names
  67 |     lines = result.stdout.split('\n')
  68 |     datasets = []
  69 | 
  70 |     for line in lines:
  71 |         if line.startswith('- ') and not line.startswith('- blocks_and_transactions:'):
  72 |             # Extract dataset name, removing any aliases
  73 |             dataset = line[2:].split(' (alias')[0].strip()
  74 |             datasets.append(dataset)
  75 |         if line == 'dataset group names':
  76 |             break
  77 | 
  78 |     return datasets
  79 | 
  80 | @mcp.tool()
  81 | def query_dataset(
  82 |     dataset: str,
  83 |     blocks: Optional[str] = None,
  84 |     start_block: Optional[int] = None,
  85 |     end_block: Optional[int] = None,
  86 |     use_latest: bool = False,
  87 |     blocks_from_latest: Optional[int] = None,
  88 |     contract: Optional[str] = None,
  89 |     output_format: str = "json",
  90 |     include_columns: Optional[List[str]] = None,
  91 |     exclude_columns: Optional[List[str]] = None
  92 | ) -> Dict[str, Any]:
  93 |     """
  94 |     Download blockchain data and return the file paths where the data is stored.
  95 |     
  96 |     IMPORTANT WORKFLOW NOTE: When running SQL queries, use this function first to download
  97 |     data, then use the returned file paths with query_sql() to execute SQL on those files.
  98 |     
  99 |     Example workflow for SQL:
 100 |     1. First download data: result = query_dataset('transactions', blocks='1000:1010', output_format='parquet')
 101 |     2. Get file paths: files = result.get('files', [])
 102 |     3. Run SQL query: query_sql("SELECT * FROM read_parquet('/path/to/file.parquet')", files=files)
 103 | 
 104 |     DATASET-SPECIFIC PARAMETERS:
 105 |     For datasets that require specific address parameters (like 'balances', 'erc20_transfers', etc.),
 106 |     ALWAYS use the 'contract' parameter to pass ANY Ethereum address. For example:
 107 |     
 108 |     - For 'balances' dataset: Use contract parameter for the address you want balances for
 109 |       query_dataset('balances', blocks='1000:1010', contract='0x123...')
 110 |     
 111 |     - For 'logs' or 'erc20_transfers': Use contract parameter for contract address
 112 |       query_dataset('logs', blocks='1000:1010', contract='0x123...')
 113 |     
 114 |     To check what parameters a dataset requires, always use lookup_dataset() first:
 115 |     lookup_dataset('balances')  # Will show required parameters
 116 | 
 117 |     Args:
 118 |         dataset: The name of the dataset to query (e.g., 'logs', 'transactions', 'balances')
 119 |         blocks: Block range specification as a string (e.g., '1000:1010')
 120 |         start_block: Start block number as integer (alternative to blocks)
 121 |         end_block: End block number as integer (alternative to blocks)
 122 |         use_latest: If True, query the latest block
 123 |         blocks_from_latest: Number of blocks before the latest to include (e.g., 10 = latest-10 to latest)
 124 |         contract: Contract address to filter by - IMPORTANT: Use this parameter for ALL address-based filtering
 125 |           regardless of the parameter name in the native cryo command (address, contract, etc.)
 126 |         output_format: Output format (json, csv, parquet) - use 'parquet' for SQL queries
 127 |         include_columns: Columns to include alongside the defaults
 128 |         exclude_columns: Columns to exclude from the defaults
 129 | 
 130 |     Returns:
 131 |         Dictionary containing file paths where the downloaded data is stored
 132 |     """
 133 |     # Ensure we have the RPC URL
 134 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
 135 |     
 136 |     # Build the cryo command
 137 |     cmd = ["cryo", dataset, "-r", rpc_url]
 138 | 
 139 |     # Handle block range (priority: blocks > use_latest > start/end_block > default)
 140 |     if blocks:
 141 |         # Use specified block range string directly
 142 |         cmd.extend(["-b", blocks])
 143 |     elif use_latest or blocks_from_latest is not None:
 144 |         # Get the latest block number
 145 |         latest_block = get_latest_block_number()
 146 |         
 147 |         if latest_block is None:
 148 |             return {"error": "Failed to get the latest block number from the RPC endpoint"}
 149 |         
 150 |         if blocks_from_latest is not None:
 151 |             # Use a range of blocks up to the latest
 152 |             start = latest_block - blocks_from_latest
 153 |             block_range = f"{start}:{latest_block+1}"  # +1 to make it inclusive
 154 |         else:
 155 |             # Just the latest block
 156 |             block_range = f"{latest_block}:{latest_block+1}"  # +1 to make it inclusive
 157 |         
 158 |         print(f"Using latest block range: {block_range}")
 159 |         cmd.extend(["-b", block_range])
 160 |     elif start_block is not None:
 161 |         # Convert integer block numbers to string range
 162 |         if end_block is not None:
 163 |             # Note: cryo uses [start:end) range (inclusive start, exclusive end)
 164 |             # Add 1 to end_block to include it in the range
 165 |             block_range = f"{start_block}:{end_block+1}"
 166 |         else:
 167 |             # If only start_block is provided, get 10 blocks starting from there
 168 |             block_range = f"{start_block}:{start_block+10}"
 169 |         
 170 |         print(f"Using block range: {block_range}")
 171 |         cmd.extend(["-b", block_range])
 172 |     else:
 173 |         # Default to a reasonable block range if none specified
 174 |         cmd.extend(["-b", "1000:1010"])
 175 | 
 176 |     # Handle dataset-specific address parameters
 177 |     # For all address-based filters, we use the contract parameter
 178 |     # but map it to the correct flag based on the dataset
 179 |     if contract:
 180 |         # Check if this is a dataset that requires a different parameter name
 181 |         if dataset == 'balances':
 182 |             # For balances dataset, contract parameter maps to --address
 183 |             cmd.extend(["--address", contract])
 184 |         else:
 185 |             # For other datasets like logs, transactions, etc. use --contract
 186 |             cmd.extend(["--contract", contract])
 187 | 
 188 |     if output_format == "json":
 189 |         cmd.append("--json")
 190 |     elif output_format == "csv":
 191 |         cmd.append("--csv")
 192 | 
 193 |     if include_columns:
 194 |         cmd.append("--include-columns")
 195 |         cmd.extend(include_columns)
 196 | 
 197 |     if exclude_columns:
 198 |         cmd.append("--exclude-columns")
 199 |         cmd.extend(exclude_columns)
 200 | 
 201 |     # Get the base data directory
 202 |     data_dir = Path(os.environ.get("CRYO_DATA_DIR", DEFAULT_DATA_DIR))
 203 |     
 204 |     # Choose output directory based on whether we're querying latest blocks
 205 |     if use_latest or blocks_from_latest is not None:
 206 |         output_dir = data_dir / "latest"
 207 |         output_dir.mkdir(parents=True, exist_ok=True)
 208 |         
 209 |         # Clean up the latest directory before new query
 210 |         print("Cleaning latest directory for current block query")
 211 |         existing_files = list(output_dir.glob(f"*{dataset}*.*"))
 212 |         for file in existing_files:
 213 |             try:
 214 |                 file.unlink()
 215 |                 print(f"Removed existing file: {file}")
 216 |             except Exception as e:
 217 |                 print(f"Warning: Could not remove file {file}: {e}")
 218 |     else:
 219 |         # For historical queries, use the main data directory
 220 |         output_dir = data_dir
 221 |         output_dir.mkdir(parents=True, exist_ok=True)
 222 | 
 223 |     cmd.extend(["-o", str(output_dir)])
 224 | 
 225 |     # Print the command for debugging
 226 |     print(f"Running query command: {' '.join(cmd)}")
 227 |     
 228 |     # Execute the command
 229 |     result = subprocess.run(cmd, capture_output=True, text=True)
 230 | 
 231 |     if result.returncode != 0:
 232 |         return {
 233 |             "error": result.stderr,
 234 |             "stdout": result.stdout,
 235 |             "command": " ".join(cmd)
 236 |         }
 237 | 
 238 |     # Try to find the report file which contains info about generated files
 239 |     report_dir = output_dir / ".cryo" / "reports"
 240 |     if report_dir.exists():
 241 |         # Get the most recent report file (should be the one we just created)
 242 |         report_files = sorted(report_dir.glob("*.json"), key=lambda x: x.stat().st_mtime, reverse=True)
 243 |         if report_files:
 244 |             with open(report_files[0], 'r') as f:
 245 |                 report_data = json.load(f)
 246 |                 # Get the list of completed files from the report
 247 |                 if "results" in report_data and "completed_paths" in report_data["results"]:
 248 |                     completed_files = report_data["results"]["completed_paths"]
 249 |                     print(f"Found {len(completed_files)} files in Cryo report: {completed_files}")
 250 |                     
 251 |                     # Return the list of files and their count
 252 |                     return {
 253 |                         "files": completed_files,
 254 |                         "count": len(completed_files),
 255 |                         "format": output_format
 256 |                     }
 257 |     
 258 |     # Fallback to glob search if report file not found or doesn't contain the expected data
 259 |     output_files = list(output_dir.glob(f"*{dataset}*.{output_format}"))
 260 |     print(f"Output files found via glob: {output_files}")
 261 | 
 262 |     if not output_files:
 263 |         return {"error": "No output files generated", "command": " ".join(cmd)}
 264 | 
 265 |     # Convert Path objects to strings for JSON serialization
 266 |     file_paths = [str(file_path) for file_path in output_files]
 267 |     
 268 |     return {
 269 |         "files": file_paths,
 270 |         "count": len(file_paths),
 271 |         "format": output_format
 272 |     }
 273 | 
 274 | @mcp.resource("dataset://{name}")
 275 | def get_dataset_info(name: str) -> Dict[str, Any]:
 276 |     """Get information about a specific dataset"""
 277 |     # Ensure we have the RPC URL
 278 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
 279 |     
 280 |     result = subprocess.run(
 281 |         ["cryo", "help", name, "-r", rpc_url],
 282 |         capture_output=True,
 283 |         text=True
 284 |     )
 285 | 
 286 |     # Get the latest block number for examples
 287 |     latest_block = get_latest_block_number()
 288 |     latest_example = ""
 289 |     
 290 |     if latest_block:
 291 |         latest_example = f"query_dataset('{name}', blocks_from_latest=10)  # Gets latest-10 to latest blocks"
 292 |     
 293 |     # Add special examples for datasets requiring address parameters
 294 |     address_example = ""
 295 |     if "address" in result.stdout.lower() and "required parameters: address" in result.stdout.lower():
 296 |         address_example = f"query_dataset('{name}', blocks='1000:1010', contract='0x123...')  # Use contract parameter for address"
 297 |     
 298 |     return {
 299 |         "name": name,
 300 |         "description": result.stdout,
 301 |         "example_queries": [
 302 |             f"query_dataset('{name}', blocks='1000:1010')",
 303 |             f"query_dataset('{name}', start_block=1000, end_block=1009)",
 304 |             f"query_dataset('{name}', use_latest=True)  # Gets just the latest block",
 305 |             latest_example,
 306 |             address_example
 307 |         ] if address_example else [
 308 |             f"query_dataset('{name}', blocks='1000:1010')",
 309 |             f"query_dataset('{name}', start_block=1000, end_block=1009)",
 310 |             f"query_dataset('{name}', use_latest=True)  # Gets just the latest block",
 311 |             latest_example
 312 |         ],
 313 |         "notes": [
 314 |             "Block ranges are inclusive for start_block and end_block when using integer parameters.",
 315 |             "Use 'use_latest=True' to query only the latest block.",
 316 |             "Use 'blocks_from_latest=N' to query the latest N blocks.",
 317 |             "IMPORTANT: For datasets requiring an 'address' parameter (like 'balances'), use the 'contract' parameter.",
 318 |             "Always check the required parameters in the dataset description and use lookup_dataset() first."
 319 |         ]
 320 |     }
 321 | 
 322 | @mcp.tool()
 323 | def lookup_dataset(
 324 |     name: str,
 325 |     sample_start_block: Optional[int] = None,
 326 |     sample_end_block: Optional[int] = None,
 327 |     use_latest_sample: bool = False,
 328 |     sample_blocks_from_latest: Optional[int] = None
 329 | ) -> Dict[str, Any]:
 330 |     """
 331 |     Look up a specific dataset and return detailed information about it. IMPORTANT: Always use this
 332 |     function before querying a new dataset to understand its required parameters and schema.
 333 |     
 334 |     The returned information includes:
 335 |     1. Required parameters for the dataset (IMPORTANT for datasets like 'balances' that need an address)
 336 |     2. Schema details showing available columns and data types
 337 |     3. Example queries for the dataset
 338 |     
 339 |     When the dataset requires specific parameters like 'address' (for 'balances'),
 340 |     ALWAYS use the 'contract' parameter in query_dataset() to pass these values.
 341 |     
 342 |     Example:
 343 |     For 'balances' dataset, lookup_dataset('balances') will show it requires an 'address' parameter.
 344 |     You should then query it using:
 345 |     query_dataset('balances', blocks='1000:1010', contract='0x1234...')
 346 |     
 347 |     Args:
 348 |         name: The name of the dataset to look up
 349 |         sample_start_block: Optional start block for sample data (integer)
 350 |         sample_end_block: Optional end block for sample data (integer)
 351 |         use_latest_sample: If True, use the latest block for sample data
 352 |         sample_blocks_from_latest: Number of blocks before the latest to include in sample
 353 |         
 354 |     Returns:
 355 |         Detailed information about the dataset including schema and available fields
 356 |     """
 357 |     # Get basic dataset info
 358 |     info = get_dataset_info(name)
 359 |     
 360 |     # Ensure we have the RPC URL
 361 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
 362 |     
 363 |     # Get schema information by running the dataset with --dry-run
 364 |     schema_result = subprocess.run(
 365 |         ["cryo", name, "--dry-run", "-r", rpc_url],
 366 |         capture_output=True,
 367 |         text=True
 368 |     )
 369 |     
 370 |     if schema_result.returncode == 0:
 371 |         info["schema"] = schema_result.stdout
 372 |     else:
 373 |         info["schema_error"] = schema_result.stderr
 374 |     
 375 |     # Try to get a sample of the dataset (first 5 records)
 376 |     try:
 377 |         data_dir = Path(os.environ.get("CRYO_DATA_DIR", DEFAULT_DATA_DIR))
 378 |         
 379 |         # Determine block range for sample (priority: latest > specified blocks > default)
 380 |         if use_latest_sample or sample_blocks_from_latest is not None:
 381 |             # Get the latest block number
 382 |             latest_block = get_latest_block_number()
 383 |             
 384 |             if latest_block is None:
 385 |                 info["sample_error"] = "Failed to get the latest block number from the RPC endpoint"
 386 |                 return info
 387 |             
 388 |             if sample_blocks_from_latest is not None:
 389 |                 # Use a range of blocks from latest-n to latest
 390 |                 block_range = f"{latest_block - sample_blocks_from_latest}:{latest_block+1}"
 391 |             else:
 392 |                 # Just the latest 5 blocks
 393 |                 block_range = f"{latest_block-4}:{latest_block+1}"
 394 |             
 395 |             info["sample_block_range"] = block_range
 396 |             
 397 |             # Use the latest directory for latest block samples
 398 |             sample_dir = data_dir / "latest"
 399 |             sample_dir.mkdir(parents=True, exist_ok=True)
 400 |             
 401 |             # Clean up the latest directory before new query
 402 |             print("Cleaning latest directory for current sample")
 403 |             existing_files = list(sample_dir.glob(f"*{name}*.*"))
 404 |             for file in existing_files:
 405 |                 try:
 406 |                     file.unlink()
 407 |                     print(f"Removed existing sample file: {file}")
 408 |                 except Exception as e:
 409 |                     print(f"Warning: Could not remove sample file {file}: {e}")
 410 |         else:
 411 |             # For historical blocks, get the start block and end block
 412 |             if sample_start_block is not None:
 413 |                 if sample_end_block is not None:
 414 |                     # Note: cryo uses [start:end) range (inclusive start, exclusive end)
 415 |                     # Add 1 to end_block to include it in the range
 416 |                     block_range = f"{sample_start_block}:{sample_end_block+1}"
 417 |                 else:
 418 |                     # Use start block and get 5 blocks
 419 |                     block_range = f"{sample_start_block}:{sample_start_block+5}"
 420 |             else:
 421 |                 # Default to a known good block range
 422 |                 block_range = "1000:1005"
 423 |             
 424 |             # For historical samples, use the main data directory
 425 |             sample_dir = data_dir
 426 |             sample_dir.mkdir(parents=True, exist_ok=True)
 427 |                 
 428 |         # Use the block range for the sample
 429 |         sample_cmd = [
 430 |             "cryo", name, 
 431 |             "-b", block_range,
 432 |             "-r", rpc_url,
 433 |             "--json", 
 434 |             "-o", str(sample_dir)
 435 |         ]
 436 |         
 437 |         print(f"Running sample command: {' '.join(sample_cmd)}")
 438 |         sample_result = subprocess.run(
 439 |             sample_cmd,
 440 |             capture_output=True,
 441 |             text=True,
 442 |             timeout=30  # Add timeout to prevent hanging
 443 |         )
 444 |         
 445 |         if sample_result.returncode == 0:
 446 |             # Try to find the report file which contains info about generated files
 447 |             report_dir = sample_dir / ".cryo" / "reports"
 448 |             if report_dir.exists():
 449 |                 # Get the most recent report file
 450 |                 report_files = sorted(report_dir.glob("*.json"), key=lambda x: x.stat().st_mtime, reverse=True)
 451 |                 if report_files:
 452 |                     with open(report_files[0], 'r') as f:
 453 |                         report_data = json.load(f)
 454 |                         # Get the list of completed files from the report
 455 |                         if "results" in report_data and "completed_paths" in report_data["results"]:
 456 |                             completed_files = report_data["results"]["completed_paths"]
 457 |                             print(f"Found {len(completed_files)} files in Cryo report: {completed_files}")
 458 |                             info["sample_files"] = completed_files
 459 |                             return info
 460 |             
 461 |             # Fallback to glob search if report file not found
 462 |             output_files = list(sample_dir.glob(f"*{name}*.json"))
 463 |             print(f"Output files found via glob: {output_files}")
 464 |             
 465 |             if output_files:
 466 |                 # Convert Path objects to strings for JSON serialization
 467 |                 file_paths = [str(file_path) for file_path in output_files]
 468 |                 info["sample_files"] = file_paths
 469 |             else:
 470 |                 info["sample_error"] = "No output files generated"
 471 |         else:
 472 |             info["sample_error"] = sample_result.stderr
 473 |             info["sample_stdout"] = sample_result.stdout  # Include stdout for debugging
 474 |     except (subprocess.TimeoutExpired, Exception) as e:
 475 |         info["sample_error"] = str(e)
 476 |     
 477 |     return info
 478 | 
 479 | @mcp.tool()
 480 | def get_transaction_by_hash(
 481 |     tx_hash: str
 482 | ) -> Dict[str, Any]:
 483 |     """
 484 |     Get detailed information about a transaction by its hash
 485 |     
 486 |     Args:
 487 |         tx_hash: The transaction hash to look up
 488 |         
 489 |     Returns:
 490 |         Detailed information about the transaction
 491 |     """
 492 |     # Ensure we have the RPC URL
 493 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
 494 |     
 495 |     # Use RPC directly to get the transaction
 496 |     payload = {
 497 |         "jsonrpc": "2.0",
 498 |         "method": "eth_getTransactionByHash",
 499 |         "params": [tx_hash],
 500 |         "id": 1
 501 |     }
 502 |     
 503 |     try:
 504 |         response = requests.post(rpc_url, json=payload)
 505 |         response_data = response.json()
 506 |         
 507 |         if 'result' in response_data and response_data['result']:
 508 |             tx_data = response_data['result']
 509 |             
 510 |             # Get the receipt as well for additional information (gas used, status)
 511 |             receipt_payload = {
 512 |                 "jsonrpc": "2.0",
 513 |                 "method": "eth_getTransactionReceipt",
 514 |                 "params": [tx_hash],
 515 |                 "id": 2
 516 |             }
 517 |             
 518 |             receipt_response = requests.post(rpc_url, json=receipt_payload)
 519 |             receipt_data = receipt_response.json()
 520 |             
 521 |             if 'result' in receipt_data and receipt_data['result']:
 522 |                 receipt = receipt_data['result']
 523 |                 
 524 |                 # Combine transaction and receipt data
 525 |                 result = {
 526 |                     "transaction_hash": tx_hash,
 527 |                     "block_number": int(tx_data.get("blockNumber", "0x0"), 16),
 528 |                     "block_hash": tx_data.get("blockHash"),
 529 |                     "from_address": tx_data.get("from"),
 530 |                     "to_address": tx_data.get("to"),
 531 |                     "value": tx_data.get("value"),
 532 |                     "value_decimal": int(tx_data.get("value", "0x0"), 16),
 533 |                     "gas_limit": int(tx_data.get("gas", "0x0"), 16),
 534 |                     "gas_price": int(tx_data.get("gasPrice", "0x0"), 16),
 535 |                     "nonce": int(tx_data.get("nonce", "0x0"), 16),
 536 |                     "input": tx_data.get("input"),
 537 |                     "transaction_index": int(tx_data.get("transactionIndex", "0x0"), 16),
 538 |                     "gas_used": int(receipt.get("gasUsed", "0x0"), 16),
 539 |                     "status": int(receipt.get("status", "0x0"), 16),
 540 |                     "logs_count": len(receipt.get("logs", [])),
 541 |                     "contract_address": receipt.get("contractAddress")
 542 |                 }
 543 |                 
 544 |                 # Handle EIP-1559 transactions
 545 |                 if "maxFeePerGas" in tx_data:
 546 |                     result["max_fee_per_gas"] = int(tx_data.get("maxFeePerGas", "0x0"), 16)
 547 |                     result["max_priority_fee_per_gas"] = int(tx_data.get("maxPriorityFeePerGas", "0x0"), 16)
 548 |                     result["transaction_type"] = int(tx_data.get("type", "0x0"), 16)
 549 |                 
 550 |                 return result
 551 |             else:
 552 |                 # Return just the transaction data if receipt is not available
 553 |                 return {
 554 |                     "transaction_hash": tx_hash,
 555 |                     "block_number": int(tx_data.get("blockNumber", "0x0"), 16),
 556 |                     "block_hash": tx_data.get("blockHash"),
 557 |                     "from_address": tx_data.get("from"),
 558 |                     "to_address": tx_data.get("to"),
 559 |                     "value": tx_data.get("value"),
 560 |                     "value_decimal": int(tx_data.get("value", "0x0"), 16),
 561 |                     "gas_limit": int(tx_data.get("gas", "0x0"), 16),
 562 |                     "gas_price": int(tx_data.get("gasPrice", "0x0"), 16),
 563 |                     "nonce": int(tx_data.get("nonce", "0x0"), 16),
 564 |                     "input": tx_data.get("input"),
 565 |                     "transaction_index": int(tx_data.get("transactionIndex", "0x0"), 16),
 566 |                     "error": "Failed to retrieve transaction receipt"
 567 |                 }
 568 |         else:
 569 |             return {"error": f"Transaction not found: {tx_hash}"}
 570 |     except Exception as e:
 571 |         return {"error": f"Exception when fetching transaction: {e}"}
 572 | 
 573 | @mcp.tool()
 574 | def get_latest_ethereum_block() -> Dict[str, Any]:
 575 |     """
 576 |     Get information about the latest Ethereum block
 577 |     
 578 |     Returns:
 579 |         Information about the latest block including block number
 580 |     """
 581 |     latest_block = get_latest_block_number()
 582 |     
 583 |     if latest_block is None:
 584 |         return {"error": "Failed to get the latest block number from the RPC endpoint"}
 585 |     
 586 |     # Get block data using cryo
 587 |     rpc_url = os.environ.get("ETH_RPC_URL", DEFAULT_RPC_URL)
 588 |     block_range = f"{latest_block}:{latest_block+1}"  # +1 to make it inclusive
 589 |     
 590 |     data_dir = Path(os.environ.get("CRYO_DATA_DIR", DEFAULT_DATA_DIR))
 591 |     latest_dir = data_dir / "latest"
 592 |     latest_dir.mkdir(parents=True, exist_ok=True)
 593 |     
 594 |     # Always clean up the latest directory for latest block
 595 |     print("Cleaning latest directory for current block")
 596 |     existing_files = list(latest_dir.glob("*blocks*.*"))
 597 |     for file in existing_files:
 598 |         try:
 599 |             file.unlink()
 600 |             print(f"Removed existing file: {file}")
 601 |         except Exception as e:
 602 |             print(f"Warning: Could not remove file {file}: {e}")
 603 |     
 604 |     cmd = [
 605 |         "cryo", "blocks", 
 606 |         "-b", block_range,
 607 |         "-r", rpc_url,
 608 |         "--json", 
 609 |         "-o", str(latest_dir)
 610 |     ]
 611 |     
 612 |     result = subprocess.run(cmd, capture_output=True, text=True)
 613 |     
 614 |     if result.returncode != 0:
 615 |         return {
 616 |             "block_number": latest_block,
 617 |             "error": "Failed to get detailed block data",
 618 |             "stderr": result.stderr
 619 |         }
 620 |     
 621 |     # Try to find the report file which contains info about generated files
 622 |     report_dir = latest_dir / ".cryo" / "reports"
 623 |     if report_dir.exists():
 624 |         # Get the most recent report file
 625 |         report_files = sorted(report_dir.glob("*.json"), key=lambda x: x.stat().st_mtime, reverse=True)
 626 |         if report_files:
 627 |             with open(report_files[0], 'r') as f:
 628 |                 report_data = json.load(f)
 629 |                 # Get the list of completed files from the report
 630 |                 if "results" in report_data and "completed_paths" in report_data["results"]:
 631 |                     completed_files = report_data["results"]["completed_paths"]
 632 |                     print(f"Found {len(completed_files)} files in Cryo report: {completed_files}")
 633 |                     
 634 |                     return {
 635 |                         "block_number": latest_block,
 636 |                         "files": completed_files,
 637 |                         "count": len(completed_files)
 638 |                     }
 639 |     
 640 |     # Fallback to glob search if report file not found
 641 |     output_files = list(latest_dir.glob("*blocks*.json"))
 642 |     
 643 |     if not output_files:
 644 |         return {
 645 |             "block_number": latest_block,
 646 |             "error": "No output files generated"
 647 |         }
 648 |     
 649 |     # Convert Path objects to strings for JSON serialization
 650 |     file_paths = [str(file_path) for file_path in output_files]
 651 |     
 652 |     return {
 653 |         "block_number": latest_block,
 654 |         "files": file_paths,
 655 |         "count": len(file_paths)
 656 |     }
 657 | 
 658 | @mcp.tool()
 659 | def query_sql(
 660 |     query: str,
 661 |     files: Optional[List[str]] = None,
 662 |     include_schema: bool = True
 663 | ) -> Dict[str, Any]:
 664 |     """
 665 |     Run a SQL query against downloaded blockchain data files
 666 |     
 667 |     IMPORTANT WORKFLOW: This function should be used after calling query_dataset
 668 |     to download data. Use the file paths returned by query_dataset as input to this function.
 669 |     
 670 |     Workflow steps:
 671 |     1. Download data: result = query_dataset('transactions', blocks='1000:1010', output_format='parquet')
 672 |     2. Get file paths: files = result.get('files', [])
 673 |     3. Execute SQL using either:
 674 |        - Direct table references: query_sql("SELECT * FROM transactions", files=files)
 675 |        - Or read_parquet(): query_sql("SELECT * FROM read_parquet('/path/to/file.parquet')", files=files)
 676 |     
 677 |     To see the schema of a file, use get_sql_table_schema(file_path) before writing your query.
 678 |     
 679 |     DuckDB supports both approaches:
 680 |     1. Direct table references (simpler): "SELECT * FROM blocks"
 681 |     2. read_parquet function (explicit): "SELECT * FROM read_parquet('/path/to/file.parquet')"
 682 |     
 683 |     Args:
 684 |         query: SQL query to execute - can use simple table names or read_parquet()
 685 |         files: List of parquet file paths to query (typically from query_dataset results)
 686 |         include_schema: Whether to include schema information in the result
 687 |         
 688 |     Returns:
 689 |         Query results and metadata
 690 |     """
 691 |     from cryo_mcp.sql import execute_sql_query
 692 |     return execute_sql_query(query, files, include_schema)
 693 | 
 694 | @mcp.tool()
 695 | def list_available_sql_tables() -> List[Dict[str, Any]]:
 696 |     """
 697 |     List all available parquet files that can be queried with SQL
 698 |     
 699 |     USAGE NOTES:
 700 |     - This function lists parquet files that have already been downloaded
 701 |     - Each file can be queried using read_parquet('/path/to/file.parquet') in your SQL
 702 |     - For each file, this returns the file path, dataset type, and other metadata
 703 |     - Use these file paths in your SQL queries with query_sql()
 704 |     
 705 |     Returns:
 706 |         List of available files and their metadata
 707 |     """
 708 |     from cryo_mcp.sql import list_available_tables
 709 |     return list_available_tables()
 710 | 
 711 | @mcp.tool()
 712 | def get_sql_table_schema(file_path: str) -> Dict[str, Any]:
 713 |     """
 714 |     Get the schema and sample data for a specific parquet file
 715 |     
 716 |     WORKFLOW NOTE: Use this function to explore the structure of parquet files
 717 |     before writing SQL queries against them. This will show you:
 718 |     1. All available columns and their data types
 719 |     2. Sample data from the file
 720 |     3. Total row count
 721 |     
 722 |     Usage example:
 723 |     1. Get list of files: files = list_available_sql_tables()
 724 |     2. For a specific file: schema = get_sql_table_schema(files[0]['path'])
 725 |     3. Use columns in your SQL: query_sql("SELECT column1, column2 FROM read_parquet('/path/to/file.parquet')")
 726 |     
 727 |     Args:
 728 |         file_path: Path to the parquet file (from list_available_sql_tables or query_dataset)
 729 |         
 730 |     Returns:
 731 |         Table schema information including columns, data types, and sample data
 732 |     """
 733 |     from cryo_mcp.sql import get_table_schema
 734 |     return get_table_schema(file_path)
 735 | 
 736 | @mcp.tool()
 737 | def query_blockchain_sql(
 738 |     sql_query: str,
 739 |     dataset: Optional[str] = None,
 740 |     blocks: Optional[str] = None,
 741 |     start_block: Optional[int] = None,
 742 |     end_block: Optional[int] = None,
 743 |     use_latest: bool = False,
 744 |     blocks_from_latest: Optional[int] = None,
 745 |     contract: Optional[str] = None,
 746 |     force_refresh: bool = False,
 747 |     include_schema: bool = True
 748 | ) -> Dict[str, Any]:
 749 |     """
 750 |     Download blockchain data and run SQL query in a single step
 751 |     
 752 |     CONVENIENCE FUNCTION: This combines query_dataset and query_sql into one call.
 753 |     
 754 |     You can write SQL queries using either approach:
 755 |     1. Simple table references: "SELECT * FROM blocks LIMIT 10"
 756 |     2. Explicit read_parquet: "SELECT * FROM read_parquet('/path/to/file.parquet') LIMIT 10"
 757 |     
 758 |     DATASET-SPECIFIC PARAMETERS:
 759 |     For datasets that require specific address parameters (like 'balances', 'erc20_transfers', etc.),
 760 |     ALWAYS use the 'contract' parameter to pass ANY Ethereum address. For example:
 761 |     
 762 |     - For 'balances' dataset: Use contract parameter for the address you want balances for
 763 |       query_blockchain_sql(
 764 |           sql_query="SELECT * FROM balances",
 765 |           dataset="balances",
 766 |           blocks='1000:1010',
 767 |           contract='0x123...'  # Address you want balances for
 768 |       )
 769 |     
 770 |     Examples:
 771 |     ```
 772 |     # Using simple table name
 773 |     query_blockchain_sql(
 774 |         sql_query="SELECT * FROM blocks LIMIT 10",
 775 |         dataset="blocks",
 776 |         blocks_from_latest=100
 777 |     )
 778 |     
 779 |     # Using read_parquet() (the path will be automatically replaced)
 780 |     query_blockchain_sql(
 781 |         sql_query="SELECT * FROM read_parquet('/any/path.parquet') LIMIT 10",
 782 |         dataset="blocks",
 783 |         blocks_from_latest=100
 784 |     )
 785 |     ```
 786 |     
 787 |     ALTERNATIVE WORKFLOW (more control):
 788 |     If you need more control, you can separate the steps:
 789 |     1. Download data: result = query_dataset('blocks', blocks_from_latest=100, output_format='parquet')
 790 |     2. Inspect schema: schema = get_sql_table_schema(result['files'][0])
 791 |     3. Run SQL query: query_sql("SELECT * FROM blocks", files=result['files'])
 792 |     
 793 |     Args:
 794 |         sql_query: SQL query to execute - using table names or read_parquet()
 795 |         dataset: The specific dataset to query (e.g., 'transactions', 'logs', 'balances')
 796 |                  If None, will be extracted from the SQL query
 797 |         blocks: Block range specification as a string (e.g., '1000:1010')
 798 |         start_block: Start block number (alternative to blocks)
 799 |         end_block: End block number (alternative to blocks)
 800 |         use_latest: If True, query the latest block
 801 |         blocks_from_latest: Number of blocks before the latest to include
 802 |         contract: Contract address to filter by - IMPORTANT: Use this parameter for ALL address-based filtering
 803 |           regardless of the parameter name in the native cryo command (address, contract, etc.)
 804 |         force_refresh: Force download of new data even if it exists
 805 |         include_schema: Include schema information in the result
 806 |         
 807 |     Returns:
 808 |         SQL query results and metadata
 809 |     """
 810 |     from cryo_mcp.sql import execute_sql_query, extract_dataset_from_sql
 811 |     
 812 |     # Try to determine dataset if not provided
 813 |     if dataset is None:
 814 |         dataset = extract_dataset_from_sql(sql_query)
 815 |         if dataset is None:
 816 |             return {
 817 |                 "success": False,
 818 |                 "error": "Could not determine dataset from SQL query. Please specify dataset parameter."
 819 |             }
 820 |     
 821 |     # First, ensure we have the data by running a query_dataset operation
 822 |     # This will download the data and return the file paths
 823 |     download_result = query_dataset(
 824 |         dataset=dataset,
 825 |         blocks=blocks,
 826 |         start_block=start_block,
 827 |         end_block=end_block,
 828 |         use_latest=use_latest,
 829 |         blocks_from_latest=blocks_from_latest,
 830 |         contract=contract,
 831 |         output_format="parquet"  # Use parquet for optimal SQL performance
 832 |     )
 833 |     
 834 |     if "error" in download_result:
 835 |         return {
 836 |             "success": False,
 837 |             "error": f"Failed to download data: {download_result['error']}",
 838 |             "download_details": download_result
 839 |         }
 840 |     
 841 |     # Get the file paths from the download result
 842 |     files = download_result.get("files", [])
 843 |     
 844 |     # Check if we have any files
 845 |     if not files:
 846 |         return {
 847 |             "success": False,
 848 |             "error": "No data files were generated from the download operation"
 849 |         }
 850 |     
 851 |     # Filter for parquet files only
 852 |     parquet_files = [f for f in files if f.endswith('.parquet')]
 853 |     if not parquet_files:
 854 |         return {
 855 |             "success": False,
 856 |             "error": "No parquet files were generated. Check output_format parameter."
 857 |         }
 858 |     
 859 |     # Now execute the SQL query directly against the downloaded parquet files
 860 |     sql_result = execute_sql_query(sql_query, parquet_files, include_schema)
 861 |     
 862 |     # Include download info in result
 863 |     sql_result["data_source"] = {
 864 |         "dataset": dataset,
 865 |         "files": files,
 866 |         "block_range": blocks or f"{start_block}:{end_block}" if start_block and end_block else "latest blocks" 
 867 |                         if use_latest or blocks_from_latest else "default range"
 868 |     }
 869 |     
 870 |     return sql_result
 871 | 
 872 | @mcp.tool()
 873 | def get_sql_examples() -> Dict[str, List[str]]:
 874 |     """
 875 |     Get example SQL queries for different blockchain datasets with DuckDB
 876 |     
 877 |     SQL WORKFLOW TIPS:
 878 |     1. First download data: result = query_dataset('dataset_name', blocks='...', output_format='parquet')
 879 |     2. Inspect schema: schema = get_sql_table_schema(result['files'][0])
 880 |     3. Run SQL: query_sql("SELECT * FROM read_parquet('/path/to/file.parquet')", files=result['files'])
 881 |     
 882 |     OR use the combined approach:
 883 |     - query_blockchain_sql(sql_query="SELECT * FROM read_parquet('...')", dataset='blocks', blocks='...')
 884 |     
 885 |     Returns:
 886 |         Dictionary of example queries categorized by dataset type and workflow patterns
 887 |     """
 888 |     return {
 889 |         "basic_usage": [
 890 |             "-- Option 1: Simple table names (recommended)",
 891 |             "SELECT * FROM blocks LIMIT 10",
 892 |             "SELECT * FROM transactions LIMIT 10",
 893 |             "SELECT * FROM logs LIMIT 10",
 894 |             
 895 |             "-- Option 2: Using read_parquet() with explicit file paths",
 896 |             "SELECT * FROM read_parquet('/path/to/blocks.parquet') LIMIT 10"
 897 |         ],
 898 |         "transactions": [
 899 |             "-- Option 1: Simple table reference",
 900 |             "SELECT * FROM transactions LIMIT 10",
 901 |             "SELECT block_number, COUNT(*) as tx_count FROM transactions GROUP BY block_number ORDER BY tx_count DESC LIMIT 10",
 902 |             
 903 |             "-- Option 2: Using read_parquet()",
 904 |             "SELECT from_address, COUNT(*) as sent_count FROM read_parquet('/path/to/transactions.parquet') GROUP BY from_address ORDER BY sent_count DESC LIMIT 10",
 905 |             "SELECT to_address, SUM(value) as total_eth FROM read_parquet('/path/to/transactions.parquet') GROUP BY to_address ORDER BY total_eth DESC LIMIT 10"
 906 |         ],
 907 |         "blocks": [
 908 |             "SELECT * FROM blocks LIMIT 10",
 909 |             "SELECT block_number, gas_used, transaction_count FROM blocks ORDER BY gas_used DESC LIMIT 10",
 910 |             "SELECT AVG(gas_used) as avg_gas, AVG(transaction_count) as avg_txs FROM blocks"
 911 |         ],
 912 |         "balances": [
 913 |             "-- IMPORTANT: When querying the balances dataset, use the 'contract' parameter to specify the address",
 914 |             "-- First download the data:",
 915 |             "# result = query_dataset('balances', blocks='15M:15.01M', contract='0x1234...', output_format='parquet')",
 916 |             "-- Then query the data:",
 917 |             "SELECT block_number, address, balance_f64 FROM balances ORDER BY block_number",
 918 |             "SELECT block_number, balance_f64, balance_f64/1e18 as balance_eth FROM balances ORDER BY block_number"
 919 |         ],
 920 |         "logs": [
 921 |             "SELECT * FROM logs LIMIT 10",
 922 |             "SELECT address, COUNT(*) as event_count FROM logs GROUP BY address ORDER BY event_count DESC LIMIT 10",
 923 |             "SELECT topic0, COUNT(*) as event_count FROM logs GROUP BY topic0 ORDER BY event_count DESC LIMIT 10"
 924 |         ],
 925 |         "joins": [
 926 |             "-- Join with simple table references",
 927 |             "SELECT t.block_number, COUNT(*) as tx_count, b.gas_used FROM transactions t JOIN blocks b ON t.block_number = b.block_number GROUP BY t.block_number, b.gas_used ORDER BY tx_count DESC LIMIT 10",
 928 |             
 929 |             "-- Join with read_parquet (useful for complex joins)",
 930 |             "SELECT l.block_number, l.address, COUNT(*) as log_count FROM read_parquet('/path/to/logs.parquet') l GROUP BY l.block_number, l.address ORDER BY log_count DESC LIMIT 10"
 931 |         ],
 932 |         "workflow_examples": [
 933 |             "-- Step 1: Download data with query_dataset",
 934 |             "# result = query_dataset(dataset='blocks', blocks='15000000:15000100', output_format='parquet')",
 935 |             "-- Step 2: Get schema info",
 936 |             "# schema = get_sql_table_schema(result['files'][0])",
 937 |             "-- Step 3: Run SQL query (simple table reference)",
 938 |             "# query_sql(query=\"SELECT * FROM blocks LIMIT 10\", files=result.get('files', []))",
 939 |             "",
 940 |             "-- Or use the combined function",
 941 |             "# query_blockchain_sql(sql_query=\"SELECT * FROM blocks LIMIT 10\", dataset='blocks', blocks='15000000:15000100')"
 942 |         ],
 943 |         "using_dataset_parameters": [
 944 |             "-- IMPORTANT: How to check required parameters for datasets",
 945 |             "-- Step 1: Look up the dataset to see required parameters",
 946 |             "# dataset_info = lookup_dataset('balances')",
 947 |             "# This will show: 'required parameters: address'",
 948 |             "",
 949 |             "-- Step 2: Use the contract parameter for ANY address parameter",
 950 |             "# For balances dataset, query_dataset('balances', blocks='1M:1.1M', contract='0x1234...')",
 951 |             "# For erc20_transfers, query_dataset('erc20_transfers', blocks='1M:1.1M', contract='0x1234...')",
 952 |             "",
 953 |             "-- Step 3: Always check the dataset description and schema before querying new datasets",
 954 |             "# This helps ensure you're passing the correct parameters"
 955 |         ]
 956 |     }
 957 | 
 958 | def parse_args(args=None):
 959 |     """Parse command line arguments"""
 960 |     parser = argparse.ArgumentParser(description="Cryo Data Server")
 961 |     parser.add_argument(
 962 |         "--rpc-url", 
 963 |         type=str, 
 964 |         help="Ethereum RPC URL to use for requests"
 965 |     )
 966 |     parser.add_argument(
 967 |         "--data-dir",
 968 |         type=str,
 969 |         help="Directory to store downloaded data, defaults to ~/.cryo-mcp/data/"
 970 |     )
 971 |     parser.add_argument(
 972 |         "--version",
 973 |         action="store_true",
 974 |         help="Show version information and exit"
 975 |     )
 976 |     return parser.parse_args(args)
 977 | 
 978 | def main():
 979 |     """Main entry point for the command-line script"""
 980 |     args = parse_args()
 981 |     
 982 |     # Check if version was requested
 983 |     if args.version:
 984 |         from cryo_mcp import __version__
 985 |         print(f"cryo-mcp version {__version__}")
 986 |         return 0
 987 |     
 988 |     # Set RPC URL with priority: command line > environment variable > default
 989 |     if args.rpc_url:
 990 |         rpc_url = args.rpc_url
 991 |         os.environ["ETH_RPC_URL"] = rpc_url
 992 |         print(f"Using RPC URL from command line: {rpc_url}")
 993 |     elif os.environ.get("ETH_RPC_URL"):
 994 |         rpc_url = os.environ["ETH_RPC_URL"]
 995 |         print(f"Using RPC URL from environment: {rpc_url}")
 996 |     else:
 997 |         rpc_url = DEFAULT_RPC_URL
 998 |         os.environ["ETH_RPC_URL"] = rpc_url
 999 |         print(f"Using default RPC URL: {rpc_url}")
1000 |     
1001 |     # Set data directory with priority: command line > environment variable > default
1002 |     if args.data_dir:
1003 |         data_dir = args.data_dir
1004 |         os.environ["CRYO_DATA_DIR"] = data_dir
1005 |         print(f"Using data directory from command line: {data_dir}")
1006 |     elif os.environ.get("CRYO_DATA_DIR"):
1007 |         data_dir = os.environ["CRYO_DATA_DIR"]
1008 |         print(f"Using data directory from environment: {data_dir}")
1009 |     else:
1010 |         data_dir = DEFAULT_DATA_DIR
1011 |         os.environ["CRYO_DATA_DIR"] = data_dir
1012 |         print(f"Using default data directory: {data_dir}")
1013 |     
1014 |     # Ensure data directory exists
1015 |     Path(data_dir).mkdir(parents=True, exist_ok=True)
1016 |     
1017 |     mcp.run()
1018 |     
1019 |     return 0
1020 | 
1021 | if __name__ == "__main__":
1022 |     sys.exit(main())
1023 | 


--------------------------------------------------------------------------------
/cryo_mcp/sql.py:
--------------------------------------------------------------------------------
  1 | """SQL query functionality for Cryo MCP using DuckDB."""
  2 | import os
  3 | import re
  4 | import json
  5 | from pathlib import Path
  6 | import duckdb
  7 | from typing import Dict, Any, List, Optional, Union
  8 | 
  9 | # Default SQL query timeout in seconds
 10 | DEFAULT_QUERY_TIMEOUT = 30
 11 | 
 12 | def get_data_directory() -> Path:
 13 |     """Get the data directory where Cryo files are stored."""
 14 |     default_data_dir = str(Path.home() / ".cryo-mcp" / "data")
 15 |     return Path(os.environ.get("CRYO_DATA_DIR", default_data_dir))
 16 | 
 17 | def create_connection(read_only: bool = False) -> duckdb.DuckDBPyConnection:
 18 |     """Create a DuckDB connection with appropriate settings."""
 19 |     # In-memory database can't be read-only, so we always use read_only=False
 20 |     conn = duckdb.connect(database=":memory:", read_only=False)
 21 |     
 22 |     # Configure DuckDB settings for performance and safety
 23 |     conn.execute("SET memory_limit='4GB'")
 24 |     conn.execute("SET max_expression_depth=10000")
 25 |     
 26 |     # Note: query_timeout_ms setting might not be available in all DuckDB versions
 27 |     try:
 28 |         conn.execute(f"SET query_timeout_ms={DEFAULT_QUERY_TIMEOUT * 1000}")
 29 |     except Exception:
 30 |         pass  # Ignore if setting doesn't exist
 31 |     
 32 |     return conn
 33 | 
 34 | def list_available_tables() -> List[Dict[str, Any]]:
 35 |     """List all available tables from downloaded data files."""
 36 |     data_dir = get_data_directory()
 37 |     
 38 |     # Find all parquet files in the data directory (including the latest subdirectory)
 39 |     parquet_files = list(data_dir.glob("**/*.parquet"))
 40 |     
 41 |     tables = []
 42 |     for file_path in parquet_files:
 43 |         # Extract dataset name from filename
 44 |         name = file_path.stem.split("__")[0]
 45 |         if "__" in file_path.stem:
 46 |             name = file_path.stem.split("__")[0]
 47 |         else:
 48 |             # Try to extract from other naming patterns
 49 |             name_match = re.match(r'([a-z_]+)_', file_path.stem)
 50 |             if name_match:
 51 |                 name = name_match.group(1)
 52 |             else:
 53 |                 name = file_path.stem
 54 |         
 55 |         # Get file stats
 56 |         stats = file_path.stat()
 57 |         
 58 |         # Try to extract block range from filename
 59 |         block_range = ""
 60 |         blocks_match = re.search(r'blocks__(\d+)_to_(\d+)', str(file_path))
 61 |         if blocks_match:
 62 |             block_range = f"{blocks_match.group(1)}:{blocks_match.group(2)}"
 63 |         
 64 |         tables.append({
 65 |             "name": name,
 66 |             "path": str(file_path),
 67 |             "size_bytes": stats.st_size,
 68 |             "modified": stats.st_mtime,
 69 |             "block_range": block_range,
 70 |             "is_latest": "latest" in str(file_path)
 71 |         })
 72 |     
 73 |     return tables
 74 | 
 75 | def extract_dataset_from_sql(sql_query: str) -> Optional[str]:
 76 |     """
 77 |     Try to extract the dataset name from an SQL query.
 78 |     
 79 |     This is a simple heuristic that looks for FROM clauses in the query.
 80 |     
 81 |     Args:
 82 |         sql_query: The SQL query to parse
 83 |         
 84 |     Returns:
 85 |         The extracted dataset name or None if it couldn't be determined
 86 |     """
 87 |     # Simple regex to find table names after FROM or JOIN
 88 |     # This won't handle all SQL syntax but works for basic queries
 89 |     matches = re.findall(r'(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql_query, re.IGNORECASE)
 90 |     
 91 |     if matches:
 92 |         # Return the first match that isn't a common SQL keyword
 93 |         for match in matches:
 94 |             if match.lower() not in ('where', 'select', 'group', 'order', 'having', 'limit', 'offset'):
 95 |                 return match
 96 |     
 97 |     return None
 98 | 
 99 | def execute_sql_query(
100 |     query: str,
101 |     files: Optional[List[str]] = None,
102 |     include_schema: bool = True
103 | ) -> Dict[str, Any]:
104 |     """
105 |     Execute a SQL query against specified parquet files.
106 |     
107 |     Args:
108 |         query: SQL query to execute
109 |         files: List of parquet file paths to query. If None, will use all files in the data directory.
110 |         include_schema: Whether to include schema information in the result
111 |         
112 |     Returns:
113 |         Dictionary with query results and metadata
114 |     """
115 |     data_dir = get_data_directory()
116 |     conn = create_connection()
117 |     
118 |     try:
119 |         # Determine which parquet files to use
120 |         parquet_files = []
121 |         if files:
122 |             for file_path in files:
123 |                 path = Path(file_path)
124 |                 if path.exists() and path.suffix == '.parquet':
125 |                     parquet_files.append(path)
126 |                 else:
127 |                     print(f"Warning: File not found or not a parquet file: {file_path}")
128 |         else:
129 |             # If no files provided, use all parquet files in the data directory
130 |             parquet_files = list(data_dir.glob("**/*.parquet"))
131 |         
132 |         if not parquet_files:
133 |             return {
134 |                 "success": False,
135 |                 "error": "No parquet files available. Download data first with query_dataset."
136 |             }
137 |         
138 |         # Register temporary views for datasets if needed
139 |         has_registered_views = False
140 |         try:
141 |             # Check if the query might be using direct table references without read_parquet()
142 |             potential_tables = extract_tables_from_sql(query)
143 |             
144 |             # Create views for potential table names that aren't using read_parquet
145 |             for table_name in potential_tables:
146 |                 if not ("read_parquet" in query.lower() and table_name.lower() in query.lower()):
147 |                     # Match files to table name more precisely
148 |                     # First, look for exact dataset name match (e.g., "blocks" in ethereum__blocks_*.parquet)
149 |                     dataset_pattern = f"__{table_name.lower()}__"
150 |                     exact_matches = [f for f in parquet_files if dataset_pattern in str(f).lower()]
151 |                     
152 |                     # If no exact matches, try looser matching
153 |                     if not exact_matches:
154 |                         # Try matching at word boundaries to avoid partial matches
155 |                         matching_files = []
156 |                         for f in parquet_files:
157 |                             file_lower = str(f).lower()
158 |                             # Match dataset name patterns like ethereum__blocks_* or *_blocks_*
159 |                             if f"__{table_name.lower()}__" in file_lower or f"_{table_name.lower()}_" in file_lower:
160 |                                 matching_files.append(f)
161 |                             # Also match if it's just the table name at the start of the filename
162 |                             elif f"/{table_name.lower()}_" in file_lower or f"/{table_name.lower()}." in file_lower:
163 |                                 matching_files.append(f)
164 |                     else:
165 |                         matching_files = exact_matches
166 |                     
167 |                     if matching_files:
168 |                         # Create a combined view from all matching files
169 |                         conn.execute(f"DROP VIEW IF EXISTS {table_name}")
170 |                         
171 |                         if len(matching_files) == 1:
172 |                             # If only one file, create a simple view
173 |                             conn.execute(f"CREATE VIEW {table_name} AS SELECT * FROM '{matching_files[0]}'")
174 |                             print(f"Registered view '{table_name}' for file: {matching_files[0]}")
175 |                         else:
176 |                             # If multiple files, create a UNION ALL view to join all files
177 |                             union_query = " UNION ALL ".join([f"SELECT * FROM '{file}'" for file in matching_files])
178 |                             conn.execute(f"CREATE VIEW {table_name} AS {union_query}")
179 |                             print(f"Registered view '{table_name}' for {len(matching_files)} files using UNION ALL")
180 |                         
181 |                         has_registered_views = True
182 |             
183 |             # Execute the query
184 |             print(f"Executing SQL query: {query}")
185 |             result = conn.execute(query).fetchdf()
186 |             
187 |             # Convert to records format for easier JSON serialization
188 |             records = result.to_dict(orient="records")
189 |             
190 |             # Get schema information if requested
191 |             schema_info = None
192 |             if include_schema and not result.empty:
193 |                 schema_info = {
194 |                     "columns": list(result.columns),
195 |                     "dtypes": {col: str(dtype) for col, dtype in result.dtypes.items()}
196 |                 }
197 |             
198 |             # Track how the files were used
199 |             file_usage = {}
200 |             if has_registered_views:
201 |                 for table_name in extract_tables_from_sql(query):
202 |                     # Use the same matching logic as above
203 |                     dataset_pattern = f"__{table_name.lower()}__"
204 |                     exact_matches = [f for f in parquet_files if dataset_pattern in str(f).lower()]
205 |                     
206 |                     if not exact_matches:
207 |                         matching_files = []
208 |                         for f in parquet_files:
209 |                             file_lower = str(f).lower()
210 |                             if f"__{table_name.lower()}__" in file_lower or f"_{table_name.lower()}_" in file_lower:
211 |                                 matching_files.append(f)
212 |                             elif f"/{table_name.lower()}_" in file_lower or f"/{table_name.lower()}." in file_lower:
213 |                                 matching_files.append(f)
214 |                     else:
215 |                         matching_files = exact_matches
216 |                     if matching_files:
217 |                         file_usage[table_name] = {
218 |                             "files": [str(f) for f in matching_files],
219 |                             "combined": len(matching_files) > 1
220 |                         }
221 |             
222 |             return {
223 |                 "success": True,
224 |                 "result": records,
225 |                 "row_count": len(records),
226 |                 "schema": schema_info,
227 |                 "files_used": [str(f) for f in parquet_files],
228 |                 "used_direct_references": has_registered_views,
229 |                 "table_mappings": file_usage if file_usage else None
230 |             }
231 |         except Exception as e:
232 |             # Handle query-specific errors
233 |             error_msg = str(e)
234 |             print(f"SQL query error: {error_msg}")
235 |             return {
236 |                 "success": False,
237 |                 "error": error_msg,
238 |                 "files_available": [str(f) for f in parquet_files]
239 |             }
240 |     except Exception as e:
241 |         # Handle connection and setup errors
242 |         return {
243 |             "success": False,
244 |             "error": str(e)
245 |         }
246 |     finally:
247 |         # Clean up any registered views
248 |         if has_registered_views:
249 |             for table_name in extract_tables_from_sql(query):
250 |                 try:
251 |                     conn.execute(f"DROP VIEW IF EXISTS {table_name}")
252 |                 except:
253 |                     pass
254 |         conn.close()
255 | 
256 | def extract_tables_from_sql(sql_query: str) -> List[str]:
257 |     """Extract table names from an SQL query that aren't using read_parquet."""
258 |     # This extends our extract_dataset_from_sql function for more general use
259 |     import re
260 |     
261 |     # Find potential table names after FROM or JOIN
262 |     matches = re.findall(r'(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)', sql_query, re.IGNORECASE)
263 |     
264 |     # Filter out common SQL keywords
265 |     sql_keywords = ('where', 'select', 'group', 'order', 'having', 'limit', 'offset')
266 |     return [match for match in matches if match.lower() not in sql_keywords]
267 | 
268 | def get_table_schema(file_path: str) -> Dict[str, Any]:
269 |     """
270 |     Get schema information for a parquet file.
271 |     
272 |     Args:
273 |         file_path: Path to the parquet file
274 |         
275 |     Returns:
276 |         Dictionary with schema information
277 |     """
278 |     conn = create_connection()
279 |     
280 |     try:
281 |         path = Path(file_path)
282 |         if not path.exists() or path.suffix != '.parquet':
283 |             return {
284 |                 "success": False,
285 |                 "error": f"File not found or not a parquet file: {file_path}"
286 |             }
287 |         
288 |         # Register a temporary view for the file
289 |         conn.execute(f"CREATE VIEW temp_view AS SELECT * FROM '{file_path}'")
290 |         
291 |         # Get schema info
292 |         schema_result = conn.execute("SELECT column_name, data_type FROM information_schema.columns WHERE table_name='temp_view'").fetchdf()
293 |         
294 |         # Get sample data
295 |         sample_data = conn.execute("SELECT * FROM temp_view LIMIT 5").fetchdf()
296 |         
297 |         # Get row count (might be expensive for large files)
298 |         row_count = conn.execute("SELECT COUNT(*) as count FROM temp_view").fetchone()[0]
299 |         
300 |         return {
301 |             "success": True,
302 |             "file_path": file_path,
303 |             "columns": schema_result.to_dict(orient="records"),
304 |             "sample_data": sample_data.to_dict(orient="records"),
305 |             "row_count": row_count
306 |         }
307 |     except Exception as e:
308 |         return {
309 |             "success": False,
310 |             "error": str(e)
311 |         }
312 |     finally:
313 |         conn.close()


--------------------------------------------------------------------------------
/ethereum__blocks__00001000_to_00001004.json:
--------------------------------------------------------------------------------
1 | [{"block_hash":"0x5b4590a9905fa1c9cc273f32e6dc63b4c512f0ee14edc6fa41c26b416a7b5d58","author":"0xbb7b8287f3f0a933474a79eae42cbca977791171","block_number":1000,"gas_used":0,"extra_data":"0x476574682f4c5649562f76312e302e302f6c696e75782f676f312e342e32","timestamp":1438272138,"base_fee_per_gas":null,"chain_id":1},{"block_hash":"0x48acba3928780f40b61ca7f0614448847b2af9b35b985e60054f7bb41b36b1cd","author":"0xbb7b8287f3f0a933474a79eae42cbca977791171","block_number":1001,"gas_used":0,"extra_data":"0x476574682f4c5649562f76312e302e302f6c696e75782f676f312e342e32","timestamp":1438272139,"base_fee_per_gas":null,"chain_id":1},{"block_hash":"0x15b90b909a3c844b8e0ca76302027619ac56c4750dcddfc83cb78c8cbdba4b28","author":"0xbb7b8287f3f0a933474a79eae42cbca977791171","block_number":1002,"gas_used":0,"extra_data":"0x476574682f4c5649562f76312e302e302f6c696e75782f676f312e342e32","timestamp":1438272140,"base_fee_per_gas":null,"chain_id":1},{"block_hash":"0x8b83375cdc6a3490595b1cde985a810bea9bdb6df601c4f07719629a59ab520d","author":"0xbb7b8287f3f0a933474a79eae42cbca977791171","block_number":1003,"gas_used":0,"extra_data":"0x476574682f4c5649562f76312e302e302f6c696e75782f676f312e342e32","timestamp":1438272141,"base_fee_per_gas":null,"chain_id":1},{"block_hash":"0x3aa173397b610df7f96ad29f76a3868890b5d6ac09fdf139bd5a7a57360f89c2","author":"0xa1623430350c5df1b52b0b57483a5bb45d1796da","block_number":1004,"gas_used":0,"extra_data":"0x476574682f76312e302e302d30636463373634372f6c696e75782f676f312e34","timestamp":1438272142,"base_fee_per_gas":null,"chain_id":1}]


--------------------------------------------------------------------------------
/ethereum__blocks__22005903_to_22005907.json:
--------------------------------------------------------------------------------
1 | [{"block_hash":"0x32a93425c3c7c4df6a2951b6b672de7adfb2ce4b194ac0fdfb701c6b90881f68","author":"0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5","block_number":22005903,"gas_used":11416700,"extra_data":"0x6265617665726275696c642e6f7267","timestamp":1741482167,"base_fee_per_gas":548088168,"chain_id":1},{"block_hash":"0xe053107b091e750749a4fb35d8f62455359fa92cd956bb1b2ddc8ceffade1771","author":"0x4838b106fce9647bdf1e7877bf73ce8b0bad5f97","block_number":22005904,"gas_used":17920506,"extra_data":"0x546974616e2028746974616e6275696c6465722e78797a29","timestamp":1741482179,"base_fee_per_gas":523073542,"chain_id":1},{"block_hash":"0x52d2a60f9d815988acb9f84af41d71bd98f254544403de330c22355f62cd1d72","author":"0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5","block_number":22005905,"gas_used":30175069,"extra_data":"0x6265617665726275696c642e6f7267","timestamp":1741482191,"base_fee_per_gas":522784911,"chain_id":1},{"block_hash":"0xf58e243788d10806928e2ce1893abe14a5d1a4c7a69f2dba0b608a0972b82e77","author":"0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5","block_number":22005906,"gas_used":12427766,"extra_data":"0x6265617665726275696c642e6f7267","timestamp":1741482203,"base_fee_per_gas":567093198,"chain_id":1},{"block_hash":"0x8ebdea19b52e3585a3a476d4eea02033c1c702072eeb59461a8442d84bbb2ac8","author":"0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5","block_number":22005907,"gas_used":28509755,"extra_data":"0x6265617665726275696c642e6f7267","timestamp":1741482215,"base_fee_per_gas":545149065,"chain_id":1}]


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "cryo-mcp"
 3 | version = "0.1.4"
 4 | description = "MCP server for querying Ethereum blockchain data using cryo"
 5 | readme = "README.md"
 6 | license = {file = "LICENSE"}
 7 | requires-python = ">=3.10"
 8 | authors = [
 9 |     {name = "z80", email = "z80@ophy.xyz"}
10 | ]
11 | 
12 | keywords = ["ethereum", "blockchain", "cryo", "mcp", "api", "server"]
13 | classifiers = [
14 |     "Development Status :: 4 - Beta",
15 |     "Intended Audience :: Developers",
16 |     "License :: OSI Approved :: MIT License",
17 |     "Programming Language :: Python :: 3",
18 |     "Programming Language :: Python :: 3.8",
19 |     "Programming Language :: Python :: 3.9",
20 |     "Programming Language :: Python :: 3.10",
21 |     "Programming Language :: Python :: 3.11",
22 |     "Programming Language :: Python :: 3.12",
23 |     "Topic :: Software Development :: Libraries :: Python Modules",
24 | ]
25 | dependencies = [
26 |     "duckdb>=1.2.1",
27 |     "mcp>=1.3.0",
28 |     "numpy>=2.2.3",
29 |     "pandas>=2.2.3",
30 |     "pyarrow>=19.0.1",
31 |     "requests>=2.28.0",
32 | ]
33 | 
34 | [project.optional-dependencies]
35 | dev = [
36 |     "pytest>=7.0.0",
37 |     "black>=23.0.0",
38 |     "isort>=5.10.0",
39 |     "mypy>=1.0.0",
40 | ]
41 | 
42 | [project.urls]
43 | "Homepage" = "https://github.com/z80dev/cryo-mcp"
44 | "Bug Tracker" = "https://github.com/z80dev/cryo-mcp/issues"
45 | 
46 | [project.scripts]
47 | cryo-mcp = "cryo_mcp.server:main"
48 | 
49 | 
50 | [build-system]
51 | requires = ["hatchling"]
52 | build-backend = "hatchling.build"
53 | 
54 | [tool.black]
55 | line-length = 88
56 | 
57 | [tool.isort]
58 | profile = "black"
59 | line_length = 88
60 | 
61 | [tool.mypy]
62 | python_version = "3.8"
63 | warn_return_any = true
64 | warn_unused_configs = true
65 | 
66 | [dependency-groups]
67 | dev = [
68 |     "ipython>=8.34.0",
69 |     "pytest>=8.3.5",
70 | ]
71 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # tests for cryo-mcp


--------------------------------------------------------------------------------
/tests/data/ethereum__blocks__00001000_to_00001004.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/z80dev/cryo-mcp/5c831d9af138d73d8345d2c994536fb4ce22c184/tests/data/ethereum__blocks__00001000_to_00001004.parquet


--------------------------------------------------------------------------------
/tests/test_blocks.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import subprocess
 5 | from pathlib import Path
 6 | 
 7 | # Set ETH_RPC_URL
 8 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 9 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
10 | 
11 | def test_integer_blocks():
12 |     """Test using integer blocks with cryo directly"""
13 |     
14 |     # Convert integer block range to string
15 |     start_block = 1000
16 |     end_block = 1005
17 |     block_range = f"{start_block}:{end_block}"
18 |     
19 |     cmd = ["cryo", "blocks", "-b", block_range, "-r", "http://10.0.0.48:8545", "--json"]
20 |     
21 |     print(f"Running command: {' '.join(cmd)}")
22 |     result = subprocess.run(cmd, capture_output=True, text=True)
23 |     
24 |     print(f"Return code: {result.returncode}")
25 |     print(f"STDOUT: {result.stdout[:500]}...")
26 |     
27 |     # Now run the equivalent using our server's string conversion logic
28 |     temp_dir = Path("/tmp/cryo_int_test")
29 |     temp_dir.mkdir(exist_ok=True)
30 |     
31 |     cmd = ["cryo", "blocks", "-b", block_range, "-r", "http://10.0.0.48:8545", "--json", "-o", str(temp_dir)]
32 |     
33 |     print(f"\nRunning output command: {' '.join(cmd)}")
34 |     result = subprocess.run(cmd, capture_output=True, text=True)
35 |     
36 |     print(f"Return code: {result.returncode}")
37 |     print(f"STDOUT: {result.stdout[:500]}...")
38 |     
39 |     # Find and read the output file
40 |     output_files = list(temp_dir.glob("*blocks*.json"))
41 |     print(f"Output files: {output_files}")
42 |     
43 |     if output_files:
44 |         with open(output_files[0], 'r') as f:
45 |             data = json.load(f)
46 |             print(f"Number of records: {len(data)}")
47 |             print(f"First block number: {data[0]['block_number']}")
48 |             print(f"Last block number: {data[-1]['block_number']}")
49 |             
50 |             # Verify that we got the block range we asked for
51 |             # Note: cryo seems to use start:end as [start, end) (inclusive start, exclusive end)
52 |             expected_blocks = list(range(start_block, end_block))
53 |             actual_blocks = [block["block_number"] for block in data]
54 |             
55 |             print(f"Expected blocks: {expected_blocks}")
56 |             print(f"Actual blocks: {actual_blocks}")
57 |             
58 |             if sorted(actual_blocks) == sorted(expected_blocks):
59 |                 print("✅ Block ranges match!")
60 |             else:
61 |                 print("❌ Block ranges do not match!")
62 |     
63 | if __name__ == "__main__":
64 |     test_integer_blocks()


--------------------------------------------------------------------------------
/tests/test_contract_transactions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import subprocess
 5 | from pathlib import Path
 6 | 
 7 | # Set ETH_RPC_URL
 8 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 9 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
10 | 
11 | def test_contract_transactions():
12 |     """Test fetching transactions for a specific contract"""
13 |     
14 |     # Use a known block number
15 |     block_num = 22001067  # You can replace this with any block number you want to test with
16 |     block_range = f"{block_num}:{block_num+1}"
17 |     
18 |     # Use a known contract address (USDC for example)
19 |     contract_address = "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48"
20 |     
21 |     # Create a temp directory for output
22 |     temp_dir = Path("/tmp/cryo_contract_tx_test")
23 |     temp_dir.mkdir(exist_ok=True)
24 |     
25 |     cmd = [
26 |         "cryo", "transactions", 
27 |         "-b", block_range, 
28 |         "--contract", contract_address,
29 |         "-r", os.environ["ETH_RPC_URL"], 
30 |         "--json", 
31 |         "-o", str(temp_dir)
32 |     ]
33 |     
34 |     print(f"Running command: {' '.join(cmd)}")
35 |     result = subprocess.run(cmd, capture_output=True, text=True)
36 |     
37 |     print(f"Return code: {result.returncode}")
38 |     print(f"STDOUT: {result.stdout[:500]}...")
39 |     
40 |     # Find and read the output file
41 |     output_files = list(temp_dir.glob("*transactions*.json"))
42 |     print(f"Output files: {output_files}")
43 |     
44 |     if output_files:
45 |         with open(output_files[0], 'r') as f:
46 |             data = json.load(f)
47 |             
48 |             print(f"Number of contract transactions: {len(data)}")
49 |             if data:
50 |                 print(f"First transaction hash: {data[0].get('transaction_hash')}")
51 |                 print(f"First transaction block number: {data[0].get('block_number')}")
52 |                 
53 |                 # Verify contract interactions
54 |                 for tx in data:
55 |                     if tx.get("to_address") == contract_address:
56 |                         print(f"Found transaction to contract: {tx.get('transaction_hash')}")
57 |                     elif tx.get("from_address") == contract_address:
58 |                         print(f"Found transaction from contract: {tx.get('transaction_hash')}")
59 |             
60 |             return data
61 |     
62 |     return None
63 | 
64 | if __name__ == "__main__":
65 |     test_contract_transactions()


--------------------------------------------------------------------------------
/tests/test_cryo.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import subprocess
 5 | from pathlib import Path
 6 | 
 7 | # Set ETH_RPC_URL
 8 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 9 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
10 | 
11 | def test_cryo_cli():
12 |     """Test direct CLI command to verify it works"""
13 |     dataset = "blocks"
14 |     cmd = ["cryo", dataset, "-b", "1000:1005", "-r", "http://10.0.0.48:8545", "--json"]
15 |     
16 |     print(f"Running command: {' '.join(cmd)}")
17 |     result = subprocess.run(cmd, capture_output=True, text=True)
18 |     
19 |     print(f"Return code: {result.returncode}")
20 |     print(f"STDOUT: {result.stdout[:500]}...")
21 |     print(f"STDERR: {result.stderr[:500]}...")
22 |     
23 |     if result.returncode != 0:
24 |         print("CLI command failed")
25 |         return False
26 |     
27 |     return True
28 | 
29 | def test_cryo_with_output():
30 |     """Test with output directory as we do in the server"""
31 |     dataset = "blocks"
32 |     temp_dir = Path("/tmp/cryo_test")
33 |     temp_dir.mkdir(exist_ok=True)
34 |     
35 |     cmd = ["cryo", dataset, "-b", "1000:1005", "-r", "http://10.0.0.48:8545", "--json", "-o", str(temp_dir)]
36 |     
37 |     print(f"Running command: {' '.join(cmd)}")
38 |     result = subprocess.run(cmd, capture_output=True, text=True)
39 |     
40 |     print(f"Return code: {result.returncode}")
41 |     print(f"STDOUT: {result.stdout[:500]}...")
42 |     print(f"STDERR: {result.stderr[:500]}...")
43 |     
44 |     if result.returncode != 0:
45 |         print("CLI command with output failed")
46 |         return False
47 |     
48 |     # Find the output file
49 |     output_files = list(temp_dir.glob(f"*{dataset}*.json"))
50 |     print(f"Output files: {output_files}")
51 |     
52 |     if not output_files:
53 |         print("No output files found")
54 |         return False
55 |     
56 |     # Read the first file
57 |     with open(output_files[0], 'r') as f:
58 |         data = json.load(f)
59 |         print(f"Data sample: {json.dumps(data[:2], indent=2)}")
60 |     
61 |     return True
62 | 
63 | if __name__ == "__main__":
64 |     print("=== Testing direct CLI command ===")
65 |     cli_result = test_cryo_cli()
66 |     
67 |     print("\n=== Testing CLI command with output directory ===")
68 |     output_result = test_cryo_with_output()
69 |     
70 |     if cli_result and output_result:
71 |         print("\n✅ All tests passed")
72 |     else:
73 |         print("\n❌ Tests failed")


--------------------------------------------------------------------------------
/tests/test_latest_block.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import subprocess
 5 | import requests
 6 | from pathlib import Path
 7 | 
 8 | # Set ETH_RPC_URL
 9 | RPC_URL = "http://10.0.0.48:8545"
10 | os.environ["ETH_RPC_URL"] = RPC_URL
11 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
12 | 
13 | def get_latest_block_number():
14 |     """Get the latest block number from the Ethereum node"""
15 |     payload = {
16 |         "jsonrpc": "2.0",
17 |         "method": "eth_blockNumber",
18 |         "params": [],
19 |         "id": 1
20 |     }
21 |     
22 |     try:
23 |         response = requests.post(RPC_URL, json=payload)
24 |         response_data = response.json()
25 |         
26 |         if 'result' in response_data:
27 |             # Convert hex to int
28 |             latest_block = int(response_data['result'], 16)
29 |             print(f"Latest block number: {latest_block}")
30 |             return latest_block
31 |         else:
32 |             print(f"Error: {response_data.get('error', 'Unknown error')}")
33 |             return None
34 |     except Exception as e:
35 |         print(f"Exception when fetching latest block: {e}")
36 |         return None
37 | 
38 | def test_blocks_range():
39 |     """Test querying a specific block range"""
40 |     # Use fixed block range for testing
41 |     start_block = 22005903
42 |     end_block = 22005908
43 |     
44 |     block_range = f"{start_block}:{end_block}"
45 |     
46 |     cmd = ["cryo", "blocks", "-b", block_range, "-r", RPC_URL, "--json"]
47 |     
48 |     print(f"Running command: {' '.join(cmd)}")
49 |     result = subprocess.run(cmd, capture_output=True, text=True)
50 |     
51 |     print(f"Return code: {result.returncode}")
52 |     print(f"STDOUT: {result.stdout[:500]}...")
53 |     
54 |     if result.returncode != 0:
55 |         print(f"STDERR: {result.stderr}")
56 |         assert False, "Command failed"
57 |     
58 |     assert True
59 | 
60 | def test_latest_blocks():
61 |     """Test getting the latest blocks"""
62 |     latest_block = get_latest_block_number()
63 |     
64 |     if latest_block is None:
65 |         print("Failed to get the latest block number")
66 |         assert False, "Failed to get the latest block number"
67 |     
68 |     # Test getting the latest 5 blocks
69 |     start_block = latest_block - 5
70 |     print(f"Fetching blocks from {start_block} to {latest_block}")
71 |     
72 |     # Direct implementation rather than calling test_blocks_range
73 |     block_range = f"{start_block}:{latest_block+1}"  # Add 1 to make it inclusive
74 |     
75 |     cmd = ["cryo", "blocks", "-b", block_range, "-r", RPC_URL, "--json"]
76 |     
77 |     print(f"Running command: {' '.join(cmd)}")
78 |     result = subprocess.run(cmd, capture_output=True, text=True)
79 |     
80 |     print(f"Return code: {result.returncode}")
81 |     print(f"STDOUT: {result.stdout[:500]}...")
82 |     
83 |     if result.returncode != 0:
84 |         print(f"STDERR: {result.stderr}")
85 |         assert False, "Failed to fetch the latest blocks"
86 |     
87 |     assert True
88 | 
89 | if __name__ == "__main__":
90 |     test_latest_blocks()


--------------------------------------------------------------------------------
/tests/test_latest_functions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import json
  3 | import os
  4 | import subprocess
  5 | import requests
  6 | from pathlib import Path
  7 | 
  8 | # Define the function directly in the test script
  9 | def get_latest_block_number():
 10 |     """Get the latest block number from the Ethereum node"""
 11 |     rpc_url = os.environ.get("ETH_RPC_URL", "http://10.0.0.48:8545")
 12 |     
 13 |     payload = {
 14 |         "jsonrpc": "2.0",
 15 |         "method": "eth_blockNumber",
 16 |         "params": [],
 17 |         "id": 1
 18 |     }
 19 |     
 20 |     try:
 21 |         response = requests.post(rpc_url, json=payload)
 22 |         response_data = response.json()
 23 |         
 24 |         if 'result' in response_data:
 25 |             # Convert hex to int
 26 |             latest_block = int(response_data['result'], 16)
 27 |             print(f"Latest block number: {latest_block}")
 28 |             return latest_block
 29 |         else:
 30 |             print(f"Error: {response_data.get('error', 'Unknown error')}")
 31 |             return None
 32 |     except Exception as e:
 33 |         print(f"Exception when fetching latest block: {e}")
 34 |         return None
 35 | 
 36 | def test_latest_block_functions():
 37 |     """Test the direct latest block functions"""
 38 |     
 39 |     print("=== Testing get_latest_block_number() ===")
 40 |     latest_block = get_latest_block_number()
 41 |     print(f"Latest block number: {latest_block}")
 42 |     
 43 |     print("\n=== Testing get_latest_ethereum_block with cryo ===")
 44 |     
 45 |     # Test getting the latest block using cryo directly
 46 |     if latest_block:
 47 |         rpc_url = os.environ.get("ETH_RPC_URL", "http://10.0.0.48:8545")
 48 |         block_range = f"{latest_block}:{latest_block+1}"
 49 |         
 50 |         temp_dir = Path("/tmp/cryo_latest_test")
 51 |         temp_dir.mkdir(exist_ok=True)
 52 |         
 53 |         cmd = ["cryo", "blocks", "-b", block_range, "-r", rpc_url, "--json", "-o", str(temp_dir)]
 54 |         
 55 |         print(f"Running command: {' '.join(cmd)}")
 56 |         result = subprocess.run(cmd, capture_output=True, text=True)
 57 |         
 58 |         if result.returncode != 0:
 59 |             print(f"Error: {result.stderr}")
 60 |             return False
 61 |         
 62 |         # Find the output file
 63 |         output_files = list(temp_dir.glob("*blocks*.json"))
 64 |         
 65 |         if not output_files:
 66 |             print("No output files found")
 67 |             return False
 68 |         
 69 |         # Read the block data
 70 |         with open(output_files[0], 'r') as f:
 71 |             data = json.load(f)
 72 |             if data and len(data) > 0:
 73 |                 print(f"Block data: {json.dumps(data[0], indent=2)}")
 74 |                 return True
 75 |     
 76 |     return False
 77 | 
 78 | def test_query_latest_blocks():
 79 |     """Test querying the latest blocks using subprocess"""
 80 |     
 81 |     # Get the latest block number
 82 |     latest_block = get_latest_block_number()
 83 |     if latest_block is None:
 84 |         print("Failed to get latest block number")
 85 |         return False
 86 |     
 87 |     # Test getting a range of latest blocks
 88 |     start_block = latest_block - 5
 89 |     end_block = latest_block
 90 |     
 91 |     # Create a block range string
 92 |     block_range = f"{start_block}:{end_block+1}"  # Add 1 to make it inclusive
 93 |     
 94 |     # Use cryo directly
 95 |     rpc_url = os.environ.get("ETH_RPC_URL", "http://10.0.0.48:8545")
 96 |     temp_dir = Path("/tmp/cryo_test_latest")
 97 |     temp_dir.mkdir(exist_ok=True)
 98 |     
 99 |     cmd = [
100 |         "cryo", "blocks", 
101 |         "-b", block_range,
102 |         "-r", rpc_url,
103 |         "--json", 
104 |         "-o", str(temp_dir)
105 |     ]
106 |     
107 |     print(f"Running command: {' '.join(cmd)}")
108 |     result = subprocess.run(cmd, capture_output=True, text=True)
109 |     
110 |     if result.returncode != 0:
111 |         print(f"Error: {result.stderr}")
112 |         return False
113 |     
114 |     # Find the output file
115 |     output_files = list(temp_dir.glob("*blocks*.json"))
116 |     
117 |     if not output_files:
118 |         print("No output files found")
119 |         return False
120 |     
121 |     # Read the first file
122 |     with open(output_files[0], 'r') as f:
123 |         data = json.load(f)
124 |         print(f"Found {len(data)} blocks")
125 |         
126 |         # Check if we got the range we expected
127 |         block_numbers = [block["block_number"] for block in data]
128 |         print(f"Block numbers: {block_numbers}")
129 |         
130 |         # Check the range covers what we requested (inclusive start to end)
131 |         expected_blocks = list(range(start_block, end_block + 1))
132 |         actual_blocks = sorted(block_numbers)
133 |         
134 |         print(f"Expected blocks: {expected_blocks}")
135 |         print(f"Actual blocks: {actual_blocks}")
136 |         
137 |         return set(expected_blocks) == set(actual_blocks)
138 | 
139 | if __name__ == "__main__":
140 |     print("Testing latest block functions")
141 |     
142 |     # Test direct functions
143 |     functions_success = test_latest_block_functions()
144 |     
145 |     # Test querying latest blocks
146 |     query_success = test_query_latest_blocks()
147 |     
148 |     if functions_success and query_success:
149 |         print("\n✅ All tests passed!")
150 |     else:
151 |         print("\n❌ Tests failed")
152 |         if not functions_success:
153 |             print("- Latest block functions test failed")
154 |         if not query_success:
155 |             print("- Query latest blocks test failed")


--------------------------------------------------------------------------------
/tests/test_mcp_functions.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import sys
  4 | from cryo_mcp.server import (
  5 |     get_latest_ethereum_block,
  6 |     list_datasets,
  7 |     query_dataset,
  8 |     lookup_dataset
  9 | )
 10 | 
 11 | # Set ETH_RPC_URL
 12 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 13 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
 14 | 
 15 | def test_get_latest_block():
 16 |     """Test the get_latest_ethereum_block function"""
 17 |     print("\n=== Testing get_latest_ethereum_block ===")
 18 |     
 19 |     block_info = get_latest_ethereum_block()
 20 |     print(f"Latest block: {block_info}")
 21 |     
 22 |     if "error" in block_info:
 23 |         print(f"❌ Error getting latest block: {block_info['error']}")
 24 |         return False
 25 |     
 26 |     print(f"✅ Successfully got latest block: {block_info['block_number']}")
 27 |     return True
 28 | 
 29 | def test_list_datasets():
 30 |     """Test the list_datasets function"""
 31 |     print("\n=== Testing list_datasets ===")
 32 |     
 33 |     datasets = list_datasets()
 34 |     print(f"Found {len(datasets)} datasets: {', '.join(datasets[:5])}...")
 35 |     
 36 |     # Check that we have some common datasets
 37 |     required_datasets = ["blocks", "transactions", "logs", "balances"]
 38 |     missing = [ds for ds in required_datasets if ds not in datasets]
 39 |     
 40 |     if missing:
 41 |         print(f"❌ Missing required datasets: {', '.join(missing)}")
 42 |         return False
 43 |     
 44 |     print(f"✅ Successfully listed {len(datasets)} datasets")
 45 |     return True
 46 | 
 47 | def test_query_dataset():
 48 |     """Test the query_dataset function"""
 49 |     print("\n=== Testing query_dataset ===")
 50 |     
 51 |     # Test transactions with latest block
 52 |     result = query_dataset(
 53 |         dataset="transactions",
 54 |         use_latest=True,
 55 |         output_format="json"
 56 |     )
 57 |     
 58 |     if "error" in result:
 59 |         print(f"❌ Error querying transactions: {result['error']}")
 60 |         return False
 61 |     
 62 |     data = result.get("data", [])
 63 |     print(f"Got {len(data)} transactions from latest block")
 64 |     
 65 |     if not data:
 66 |         print("❌ No transactions returned")
 67 |         return False
 68 |     
 69 |     # Test transactions with block range and contract filter
 70 |     contract_address = "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48"  # USDC
 71 |     result = query_dataset(
 72 |         dataset="transactions",
 73 |         blocks="22001067:22001068",
 74 |         contract=contract_address,
 75 |         output_format="json"
 76 |     )
 77 |     
 78 |     if "error" in result:
 79 |         print(f"❌ Error querying contract transactions: {result['error']}")
 80 |         return False
 81 |     
 82 |     data = result.get("data", [])
 83 |     print(f"Got {len(data)} USDC transactions from block 22001067")
 84 |     
 85 |     contract_txs = [tx for tx in data if tx.get("to_address") == contract_address]
 86 |     print(f"Found {len(contract_txs)} transactions to USDC")
 87 |     
 88 |     print(f"✅ Successfully queried dataset with different parameters")
 89 |     return True
 90 | 
 91 | def test_lookup_dataset():
 92 |     """Test the lookup_dataset function"""
 93 |     print("\n=== Testing lookup_dataset ===")
 94 |     
 95 |     # Look up transactions dataset
 96 |     result = lookup_dataset(
 97 |         name="transactions",
 98 |         use_latest_sample=True
 99 |     )
100 |     
101 |     if "schema_error" in result and "sample_error" not in result:
102 |         print(f"❓ Schema error but sample OK: {result['schema_error']}")
103 |     elif "sample_error" in result:
104 |         print(f"❓ Sample error: {result['sample_error']}")
105 |     
106 |     print(f"Dataset info: {result['name']}")
107 |     print(f"Example queries: {result['example_queries']}")
108 |     
109 |     # Check that we got some schema information
110 |     if "schema" in result or "schema_error" in result:
111 |         print("✅ Got schema information (or error)")
112 |     else:
113 |         print("❌ Missing schema information")
114 |         return False
115 |     
116 |     print(f"✅ Successfully looked up dataset information")
117 |     return True
118 | 
119 | def main():
120 |     """Run all tests"""
121 |     tests = [
122 |         test_get_latest_block,
123 |         test_list_datasets,
124 |         test_query_dataset,
125 |         test_lookup_dataset
126 |     ]
127 |     
128 |     results = []
129 |     for test in tests:
130 |         results.append(test())
131 |     
132 |     print("\n=== Test Summary ===")
133 |     print(f"Passed: {results.count(True)}/{len(results)}")
134 |     
135 |     return 0 if all(results) else 1
136 | 
137 | if __name__ == "__main__":
138 |     sys.exit(main())


--------------------------------------------------------------------------------
/tests/test_sql.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import os
  3 | import json
  4 | import tempfile
  5 | from pathlib import Path
  6 | import unittest
  7 | import subprocess
  8 | from unittest.mock import patch, MagicMock
  9 | import shutil
 10 | 
 11 | # Add parent directory to path to import modules
 12 | import sys
 13 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 14 | 
 15 | from cryo_mcp.sql import execute_sql_query, list_available_tables, create_connection, extract_dataset_from_sql
 16 | from cryo_mcp.server import query_blockchain_sql, query_dataset
 17 | 
 18 | # Constants for test data
 19 | TEST_DATA_DIR = Path(__file__).parent / "data"
 20 | TEST_BLOCK_RANGE = "1000:1005"  # Use a small block range for testing
 21 | 
 22 | class TestSQL(unittest.TestCase):
 23 |     """Test cases for SQL functionality"""
 24 |     
 25 |     @classmethod
 26 |     def setUpClass(cls):
 27 |         """Setup for all tests - download real blockchain data once"""
 28 |         # Create test data directory if it doesn't exist
 29 |         TEST_DATA_DIR.mkdir(exist_ok=True)
 30 |         
 31 |         # Setup ETH_RPC_URL environment variable if not set
 32 |         if not os.environ.get("ETH_RPC_URL"):
 33 |             os.environ["ETH_RPC_URL"] = "http://localhost:8545"
 34 |         
 35 |         # We don't need to download data here anymore as we've manually downloaded it 
 36 |         # via direct shell command
 37 |     
 38 |     def setUp(self):
 39 |         """Setup for each test"""
 40 |         # Create a temporary directory for test data
 41 |         self.temp_dir = tempfile.TemporaryDirectory()
 42 |         self.data_dir = Path(self.temp_dir.name)
 43 |         
 44 |         # Set environment variable for data directory
 45 |         os.environ["CRYO_DATA_DIR"] = str(self.data_dir)
 46 |         
 47 |         # Create latest directory
 48 |         self.latest_dir = self.data_dir / "latest"
 49 |         self.latest_dir.mkdir(exist_ok=True)
 50 |         
 51 |         # Copy real parquet files from TEST_DATA_DIR to temp dir if they exist
 52 |         self.has_real_data = False
 53 |         parquet_files = list(TEST_DATA_DIR.glob("*.parquet"))
 54 |         if parquet_files:
 55 |             for file in parquet_files:
 56 |                 shutil.copy(file, self.data_dir)
 57 |             self.has_real_data = True
 58 |             print(f"Using real blockchain data for tests: {[f.name for f in parquet_files]}")
 59 |     
 60 |     def tearDown(self):
 61 |         """Clean up temporary directory"""
 62 |         self.temp_dir.cleanup()
 63 | 
 64 |     def create_mock_parquet_file(self, dataset_name, is_latest=False):
 65 |         """Create a mock parquet file for testing"""
 66 |         # If we already have real data, don't need to create mock data
 67 |         if self.has_real_data and dataset_name == "blocks":
 68 |             return next(self.data_dir.glob("*blocks*.parquet"))
 69 |         
 70 |         # Determine the directory based on whether it's a latest file
 71 |         directory = self.latest_dir if is_latest else self.data_dir
 72 |         
 73 |         # Create a mock parquet file (doesn't need to be a real parquet file for our tests)
 74 |         file_path = directory / f"{dataset_name}__00001000_to_00001010.parquet"
 75 |         with open(file_path, 'w') as f:
 76 |             f.write("mock parquet data")
 77 |         
 78 |         return file_path
 79 | 
 80 |     @patch('cryo_mcp.sql.duckdb.connect')
 81 |     def test_extract_dataset_from_sql(self, mock_connect):
 82 |         """Test extracting dataset names from SQL queries"""
 83 |         test_cases = [
 84 |             {"query": "SELECT * FROM blocks LIMIT 10", "expected": "blocks"},
 85 |             {"query": "SELECT block_number FROM transactions WHERE value > 0", "expected": "transactions"},
 86 |             {"query": "SELECT logs.address FROM logs", "expected": "logs"},
 87 |             {"query": "SELECT t.hash FROM transactions t JOIN blocks b", "expected": "transactions"},
 88 |             {"query": "SELECT * FROM WHERE x = 1", "expected": None},  # Invalid SQL
 89 |             {"query": "SELECT * FROM", "expected": None},  # Invalid SQL
 90 |         ]
 91 |         
 92 |         for case in test_cases:
 93 |             result = extract_dataset_from_sql(case["query"])
 94 |             self.assertEqual(result, case["expected"], f"Failed for query: {case['query']}")
 95 | 
 96 |     def test_list_available_tables(self):
 97 |         """Test listing available tables with real data"""
 98 |         # If we don't have real data, we need to create mock files
 99 |         if not self.has_real_data:
100 |             self.create_mock_parquet_file("blocks")
101 |             self.create_mock_parquet_file("transactions", is_latest=True)
102 |         else:
103 |             # With real data, we should already have a blocks table
104 |             pass
105 |             
106 |         # Get tables
107 |         tables = list_available_tables()
108 |         
109 |         # Check that we have at least one table
110 |         self.assertTrue(len(tables) > 0, "Should find at least one table")
111 |         
112 |         # With real data, verify that our known table is found
113 |         if self.has_real_data:
114 |             # There should be at least one table with 'ethereum' in the name
115 |             ethereum_tables = [table for table in tables if 'ethereum' in table["path"]]
116 |             self.assertTrue(len(ethereum_tables) > 0, "Should find ethereum tables")
117 | 
118 |     @patch('cryo_mcp.server.query_dataset')
119 |     @patch('cryo_mcp.sql.execute_sql_query')
120 |     def test_query_blockchain_sql(self, mock_execute_sql, mock_query_dataset):
121 |         """Test the combined blockchain SQL query function"""
122 |         # Mock query_dataset to return a successful result
123 |         mock_query_dataset.return_value = {
124 |             "files": ["/path/to/blocks__1000_to_1010.parquet"],
125 |             "count": 1,
126 |             "format": "parquet"
127 |         }
128 |         
129 |         # Mock execute_sql_query to return a successful result
130 |         mock_execute_sql.return_value = {
131 |             "success": True,
132 |             "result": [{"block_number": 1000, "gas_used": 1000000}],
133 |             "row_count": 1,
134 |             "schema": {"columns": ["block_number", "gas_used"]},
135 |             "files_used": ["/path/to/blocks__1000_to_1010.parquet"]
136 |         }
137 |         
138 |         # Call query_blockchain_sql
139 |         result = query_blockchain_sql(
140 |             sql_query="SELECT block_number, gas_used FROM '/path/to/blocks__1000_to_1010.parquet' LIMIT 1",
141 |             dataset="blocks",
142 |             blocks="1000:1010"
143 |         )
144 |         
145 |         # Check results
146 |         self.assertTrue(result["success"], "Query should succeed")
147 |         
148 |         # Verify that query_dataset was called with correct parameters
149 |         mock_query_dataset.assert_called_once_with(
150 |             dataset="blocks",
151 |             blocks="1000:1010",
152 |             start_block=None,
153 |             end_block=None,
154 |             use_latest=False,
155 |             blocks_from_latest=None,
156 |             contract=None,
157 |             output_format="parquet"
158 |         )
159 |         
160 |         # Verify that execute_sql_query was called with correct parameters
161 |         mock_execute_sql.assert_called_once_with(
162 |             "SELECT block_number, gas_used FROM '/path/to/blocks__1000_to_1010.parquet' LIMIT 1",
163 |             ["/path/to/blocks__1000_to_1010.parquet"],  # files parameter
164 |             True  # include_schema parameter
165 |         )
166 | 
167 |     def test_execute_sql_query_with_nonexistent_file(self):
168 |         """Test executing SQL query with a nonexistent file"""
169 |         # Call execute_sql_query with a file that doesn't exist
170 |         result = execute_sql_query(
171 |             "SELECT * FROM '/nonexistent/file.parquet' LIMIT 1", 
172 |             files=['/nonexistent/file.parquet']
173 |         )
174 |         
175 |         # Print debug info
176 |         print("Nonexistent file query result:", result)
177 |         
178 |         # Check results
179 |         self.assertFalse(result["success"], "Query should fail with nonexistent file")
180 |         self.assertIn("error", result, "Should return an error message")
181 | 
182 |     def test_execute_sql_query_with_real_data(self):
183 |         """Test executing SQL query with real blockchain data"""
184 |         # Skip this test if we don't have real data
185 |         if not self.has_real_data:
186 |             self.skipTest("No real blockchain data available")
187 |         
188 |         # Find parquet files to use for testing
189 |         parquet_files = list(self.data_dir.glob("*.parquet"))
190 |         if not parquet_files:
191 |             self.skipTest("No parquet files found for testing")
192 |         
193 |         # Get file paths as strings for the test
194 |         file_paths = [str(f) for f in parquet_files]
195 |         
196 |         # Part 1: Test direct file reference
197 |         result = execute_sql_query(
198 |             f"SELECT * FROM '{file_paths[0]}' LIMIT 3",
199 |             files=file_paths
200 |         )
201 |         
202 |         # Print some debug info to see what's happening
203 |         print("Result:", result)
204 |         
205 |         # Check if we have an error
206 |         if not result.get("success", False) and "error" in result:
207 |             print("SQL error:", result["error"])
208 |             
209 |             # Inspect the parquet file to make sure it's valid
210 |             for file in parquet_files:
211 |                 print(f"Parquet file details: {file}")
212 |                 print(f"File size: {file.stat().st_size} bytes")
213 |                 
214 |             try:
215 |                 # Try to read the parquet file directly
216 |                 from cryo_mcp.sql import create_connection
217 |                 conn = create_connection()
218 |                 conn.execute(f"SELECT * FROM '{file_paths[0]}' LIMIT 1")
219 |                 print("Direct parquet read test succeeded")
220 |             except Exception as e:
221 |                 print(f"Direct parquet read test failed: {e}")
222 |         
223 |         # Check results
224 |         self.assertTrue(result.get("success", False), "Query should succeed")
225 |         self.assertEqual(result["row_count"], 3, "Should return 3 rows")
226 |         self.assertEqual(len(result["files_used"]), len(file_paths), "Should track all files")
227 |         
228 |         # Verify that we got real data with expected columns
229 |         self.assertIn("schema", result, "Should include schema")
230 |         self.assertIn("columns", result["schema"], "Should include columns in schema")
231 |         
232 |         # Verify we can run a more complex query directly on the file
233 |         complex_result = execute_sql_query(
234 |             f"""
235 |             SELECT 
236 |                 MIN(block_number) as min_block,
237 |                 MAX(block_number) as max_block,
238 |                 AVG(gas_used) as avg_gas
239 |             FROM '{file_paths[0]}'
240 |             """,
241 |             files=file_paths
242 |         )
243 | 
244 |         print("Complex result:", complex_result)
245 |         self.assertTrue(complex_result["success"], "Complex query should succeed")
246 |         self.assertEqual(complex_result["row_count"], 1, "Should return 1 summary row")
247 |         self.assertIn("min_block", complex_result["result"][0], "Should have min_block column")
248 |         self.assertIn("max_block", complex_result["result"][0], "Should have max_block column")
249 |         
250 |         # Part 2: Test table name with multiple files (if we have more than one file)
251 |         if len(parquet_files) > 1:
252 |             # Create a duplicate file to ensure we have multiple files
253 |             duplicate_file = self.data_dir / f"{parquet_files[0].stem}_copy.parquet"
254 |             shutil.copy(parquet_files[0], duplicate_file)
255 |             
256 |             # Update file paths list to include the duplicate
257 |             file_paths.append(str(duplicate_file))
258 |             
259 |             # Extract dataset name from filename for table reference
260 |             # Example: ethereum__blocks__00001000_to_00001004.parquet -> blocks
261 |             dataset_name = None
262 |             file_name = parquet_files[0].stem
263 |             if "__" in file_name:
264 |                 parts = file_name.split("__")
265 |                 if len(parts) > 1:
266 |                     dataset_name = parts[1]  # e.g., blocks, transactions
267 |             
268 |             if not dataset_name:
269 |                 # Fallback - just use a simple name
270 |                 dataset_name = "blocks"
271 |             
272 |             # Run a query using table name (should combine files)
273 |             multi_file_result = execute_sql_query(
274 |                 f"SELECT COUNT(*) as total_rows FROM {dataset_name}",
275 |                 files=file_paths
276 |             )
277 |             
278 |             print(f"Multi-file result for table '{dataset_name}':", multi_file_result)
279 |             
280 |             # Check that our query was successful
281 |             self.assertTrue(multi_file_result["success"], "Multi-file query should succeed")
282 |             
283 |             # Verify table mappings show multiple files were used
284 |             self.assertIsNotNone(multi_file_result.get("table_mappings"), "Should include table mappings")
285 |             self.assertTrue(
286 |                 any(mapping["combined"] for mapping in multi_file_result.get("table_mappings", {}).values()),
287 |                 "Should indicate files were combined"
288 |             )
289 | 
290 |     @patch('duckdb.DuckDBPyConnection')
291 |     @patch('cryo_mcp.sql.duckdb.connect')
292 |     def test_execute_sql_query_with_mock_data(self, mock_connect, mock_connection):
293 |         """Test executing SQL query with mock data (fallback if real data unavailable)"""
294 |         # Skip if we have real data (we'll use the real data test instead)
295 |         if self.has_real_data:
296 |             self.skipTest("Using real data test instead")
297 |             
298 |         # Create mock parquet file
299 |         file_path = self.create_mock_parquet_file("blocks")
300 |         
301 |         # Setup mock connection and cursor
302 |         mock_fetchdf = MagicMock()
303 |         mock_fetchdf.to_dict.return_value = [{"block_number": 1000, "gas_used": 1000000}]
304 |         mock_fetchdf.empty = False
305 |         mock_fetchdf.columns = ["block_number", "gas_used"]
306 |         mock_fetchdf.dtypes = {"block_number": "int64", "gas_used": "int64"}
307 |         
308 |         mock_cursor = MagicMock()
309 |         mock_cursor.fetchdf.return_value = mock_fetchdf
310 |         
311 |         mock_connection_instance = mock_connect.return_value
312 |         mock_connection_instance.execute.return_value = mock_cursor
313 |         
314 |         # Call execute_sql_query with direct file reference
315 |         result = execute_sql_query(
316 |             f"SELECT * FROM '{file_path}'",
317 |             files=[str(file_path)]
318 |         )
319 |         
320 |         # Check connection setup
321 |         mock_connect.assert_called_once()
322 |         
323 |         # Check results
324 |         self.assertTrue(result["success"], "Query should succeed")
325 |         self.assertEqual(result["row_count"], 1, "Should return correct row count")
326 |         self.assertEqual(len(result["files_used"]), 1, "Should track files used")
327 |         self.assertIn(str(file_path), result["files_used"][0], "Should include file path used")
328 | 
329 | 
330 | if __name__ == "__main__":
331 |     unittest.main()
332 | 


--------------------------------------------------------------------------------
/tests/test_transaction_by_hash.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import os
 3 | import sys
 4 | from cryo_mcp.server import get_transaction_by_hash
 5 | 
 6 | # Set ETH_RPC_URL
 7 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 8 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
 9 | 
10 | def test_get_transaction_by_hash():
11 |     """Test getting transaction details by hash"""
12 |     
13 |     # Test with a known transaction hash from our previous tests
14 |     # You can replace this with any valid transaction hash you want to test with
15 |     tx_hash = "0xbee5a5c9024d9d6dde31c180c71b21aba1ebb7a726cf148a4b2781cf0ca7b7e6"
16 |     
17 |     print(f"Looking up transaction: {tx_hash}")
18 |     tx_info = get_transaction_by_hash(tx_hash)
19 |     
20 |     if "error" in tx_info:
21 |         print(f"❌ Error: {tx_info['error']}")
22 |         return False
23 |     
24 |     # Print transaction details
25 |     print("\nTransaction Details:")
26 |     for key, value in tx_info.items():
27 |         # Skip printing the full input data which can be very long
28 |         if key == "input" and value and len(value) > 100:
29 |             print(f"  {key}: {value[:50]}...{value[-50:]}")
30 |         else:
31 |             print(f"  {key}: {value}")
32 |     
33 |     # Test with an invalid transaction hash
34 |     invalid_hash = "0x1234567890123456789012345678901234567890123456789012345678901234"
35 |     
36 |     print(f"\nLooking up invalid transaction: {invalid_hash}")
37 |     invalid_tx = get_transaction_by_hash(invalid_hash)
38 |     
39 |     if "error" in invalid_tx:
40 |         print(f"✅ Expected error for invalid hash: {invalid_tx['error']}")
41 |     else:
42 |         print(f"❌ Unexpected success for invalid hash: {invalid_tx}")
43 |         return False
44 |     
45 |     return True
46 | 
47 | if __name__ == "__main__":
48 |     test_get_transaction_by_hash()


--------------------------------------------------------------------------------
/tests/test_transactions.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | import os
 4 | import subprocess
 5 | from pathlib import Path
 6 | 
 7 | # Set ETH_RPC_URL
 8 | os.environ["ETH_RPC_URL"] = "http://10.0.0.48:8545"
 9 | print(f"Using ETH_RPC_URL: {os.environ['ETH_RPC_URL']}")
10 | 
11 | def test_transactions():
12 |     """Test fetching transactions for a specific block"""
13 |     
14 |     # Use a known block number
15 |     block_num = 22001067  # You can replace this with any block number you want to test with
16 |     block_range = f"{block_num}:{block_num+1}"
17 |     
18 |     # Create a temp directory for output
19 |     temp_dir = Path("/tmp/cryo_tx_test")
20 |     temp_dir.mkdir(exist_ok=True)
21 |     
22 |     cmd = ["cryo", "transactions", "-b", block_range, "-r", os.environ["ETH_RPC_URL"], "--json", "-o", str(temp_dir)]
23 |     
24 |     print(f"Running command: {' '.join(cmd)}")
25 |     result = subprocess.run(cmd, capture_output=True, text=True)
26 |     
27 |     print(f"Return code: {result.returncode}")
28 |     print(f"STDOUT: {result.stdout[:500]}...")
29 |     
30 |     # Find and read the output file
31 |     output_files = list(temp_dir.glob("*transactions*.json"))
32 |     print(f"Output files: {output_files}")
33 |     
34 |     if output_files:
35 |         with open(output_files[0], 'r') as f:
36 |             data = json.load(f)
37 |             
38 |             print(f"Number of transactions: {len(data)}")
39 |             if data:
40 |                 print(f"First transaction hash: {data[0].get('transaction_hash')}")
41 |                 print(f"First transaction block number: {data[0].get('block_number')}")
42 |                 
43 |                 # Save the first transaction to a file for inspection
44 |                 print(f"Saving first transaction to ethereum__blocks_{block_num}_to_{block_num}.json")
45 |                 with open(f"ethereum__blocks_{block_num}_to_{block_num}.json", 'w') as outfile:
46 |                     json.dump(data, outfile, indent=2)
47 |             
48 |             return data
49 |     
50 |     return None
51 | 
52 | if __name__ == "__main__":
53 |     test_transactions()


--------------------------------------------------------------------------------