├── mcpb ├── requirements.txt ├── .gitignore ├── .dxtignore ├── icon.png ├── README.md ├── manifest.json └── server.py ├── kumo_rfm_mcp ├── _version.py ├── tools │ ├── __init__.py │ ├── auth.py │ ├── io.py │ ├── docs.py │ ├── model.py │ └── graph.py ├── __init__.py ├── session.py ├── resources │ ├── overview.md │ ├── explainability.md │ ├── graph-setup.md │ └── predictive-query.md ├── server.py └── config.py ├── .gitignore ├── test ├── tools │ ├── test_docs.py │ ├── test_io.py │ ├── test_model.py │ └── test_graph.py └── conftest.py ├── .github └── workflows │ ├── mcpb.yml │ ├── test.yml │ ├── lint.yml │ ├── _release.yml │ └── release.yml ├── LICENSE ├── .pre-commit-config.yaml ├── pyproject.toml └── README.md /mcpb/requirements.txt: -------------------------------------------------------------------------------- 1 | kumo-rfm-mcp==0.2.0 2 | -------------------------------------------------------------------------------- /mcpb/.gitignore: -------------------------------------------------------------------------------- 1 | *.dxt 2 | sys_venv/ 3 | uv_venv/ 4 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.3.0.dev0' 2 | -------------------------------------------------------------------------------- /mcpb/.dxtignore: -------------------------------------------------------------------------------- 1 | README.md 2 | sys_venv/ 3 | uv_venv/ 4 | -------------------------------------------------------------------------------- /mcpb/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kumo-ai/kumo-rfm-mcp/HEAD/mcpb/icon.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[oc] 3 | build/ 4 | dist/ 5 | wheels/ 6 | *.egg-info 7 | .venv 8 | .env 9 | .mypy_cache/ 10 | tmp/ 11 | mcp.json 12 | server-info.json 13 | .DS_Store 14 | -------------------------------------------------------------------------------- /mcpb/README.md: -------------------------------------------------------------------------------- 1 | # MCPB Packaging 2 | 3 | 1. Install: 4 | ```bash 5 | npm install -g @anthropic-ai/dxt 6 | ``` 7 | 1. Create `dxt` file: 8 | ```bash 9 | dxt pack 10 | ``` 11 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .docs import register_docs_tools 2 | from .auth import register_auth_tools 3 | from .io import register_io_tools 4 | from .graph import register_graph_tools 5 | from .model import register_model_tools 6 | 7 | __all__ = [ 8 | 'register_docs_tools', 9 | 'register_auth_tools', 10 | 'register_io_tools', 11 | 'register_graph_tools', 12 | 'register_model_tools', 13 | ] 14 | -------------------------------------------------------------------------------- /test/tools/test_docs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from fastmcp import Client 3 | 4 | from kumo_rfm_mcp.server import mcp 5 | 6 | 7 | @pytest.mark.asyncio 8 | async def test_get_docs() -> None: 9 | async with Client(mcp) as client: 10 | result = await client.call_tool('get_docs', { 11 | 'resource_uri': 'kumo://docs/overview', 12 | }) 13 | assert result.content[0].text.startswith('# Overview of KumoRFM\n') 14 | 15 | result = await client.call_tool( 16 | 'get_docs', { 17 | 'resource_uri': 'kumo://docs/graph-setup', 18 | }) 19 | assert result.content[0].text.startswith('# Graph Setup\n') 20 | 21 | result = await client.call_tool( 22 | 'get_docs', { 23 | 'resource_uri': 'kumo://docs/predictive-query', 24 | }) 25 | assert result.content[0].text.startswith('# Predictive Query\n') 26 | -------------------------------------------------------------------------------- /.github/workflows/mcpb.yml: -------------------------------------------------------------------------------- 1 | name: MCPBundle 2 | 3 | on: # yamllint disable-line rule:truthy 4 | push: 5 | branches: [main] 6 | pull_request: 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ startsWith(github.ref, 'refs/pull/') || github.run_number }} 10 | # Only cancel intermediate builds if on a PR: 11 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} 12 | 13 | jobs: 14 | mcpb: 15 | runs-on: ${{ matrix.os }} 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | os: [ubuntu-latest, windows-latest, macos-latest] 21 | 22 | steps: 23 | - name: Checkout repository 24 | uses: actions/checkout@v5 25 | 26 | - name: Set up Python 27 | uses: actions/setup-python@v5 28 | with: 29 | python-version: '3.10' 30 | 31 | - name: Run MCPB 32 | run: python mcpb/server.py 33 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import __version__ 2 | from .config import ( 3 | TableSource, 4 | TableSourcePreview, 5 | TableMetadata, 6 | AddTableMetadata, 7 | UpdateTableMetadata, 8 | LinkMetadata, 9 | GraphMetadata, 10 | UpdateGraphMetadata, 11 | UpdatedGraphMetadata, 12 | MaterializedGraphInfo, 13 | PredictResponse, 14 | EvaluateResponse, 15 | ExplanationResponse, 16 | ) 17 | from .session import Session, SessionManager 18 | 19 | __all__ = [ 20 | '__version__', 21 | 'TableSource', 22 | 'TableSourcePreview', 23 | 'TableMetadata', 24 | 'AddTableMetadata', 25 | 'UpdateTableMetadata', 26 | 'LinkMetadata', 27 | 'GraphMetadata', 28 | 'UpdateGraphMetadata', 29 | 'UpdatedGraphMetadata', 30 | 'MaterializedGraphInfo', 31 | 'PredictResponse', 32 | 'EvaluateResponse', 33 | 'ExplanationResponse', 34 | 'Session', 35 | 'SessionManager', 36 | ] 37 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: # yamllint disable-line rule:truthy 4 | push: 5 | branches: [main] 6 | pull_request: 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ startsWith(github.ref, 'refs/pull/') || github.run_number }} 10 | # Only cancel intermediate builds if on a PR: 11 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} 12 | 13 | jobs: 14 | pytest: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v5 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.10' 25 | 26 | - name: Install uv 27 | uses: astral-sh/setup-uv@v6 28 | with: 29 | python-version: '3.10' 30 | activate-environment: true 31 | 32 | - name: Install Kumo RFM MCP 33 | run: uv pip install '.[dev]' 34 | 35 | - name: Run test suite 36 | run: uv run --no-project pytest 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Kumo.ai, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the " Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: # yamllint disable-line rule:truthy 4 | push: 5 | branches: [main] 6 | pull_request: 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ startsWith(github.ref, 'refs/pull/') || github.run_number }} 10 | # Only cancel intermediate builds if on a PR: 11 | cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} 12 | 13 | jobs: 14 | mypy: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v5 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: '3.10' 25 | 26 | - name: Install uv 27 | uses: astral-sh/setup-uv@v6 28 | with: 29 | python-version: '3.10' 30 | activate-environment: true 31 | 32 | - name: Install Kumo RFM MCP 33 | run: uv pip install . 34 | 35 | - name: Install dependencies 36 | run: uv pip install mypy 37 | 38 | - name: Check type hints 39 | run: uv run --no-project mypy 40 | -------------------------------------------------------------------------------- /test/tools/test_io.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from kumo_rfm_mcp.tools.io import find_table_files, inspect_table_files 6 | 7 | 8 | @pytest.mark.asyncio 9 | @pytest.mark.parametrize('recursive', [False, True]) 10 | async def test_find_table_files(root_dir: Path, recursive: bool) -> None: 11 | sources = await find_table_files(root_dir, recursive) 12 | assert len(sources) == 3 13 | filenames = {source.path.name for source in sources} 14 | assert filenames == {'USERS.csv', 'ORDERS.parquet', 'STORES.csv'} 15 | 16 | 17 | @pytest.mark.asyncio 18 | async def test_inspect_table_files(root_dir: Path) -> None: 19 | previews = await inspect_table_files( 20 | paths=[(root_dir / 'USERS.csv').as_posix()], 21 | num_rows=4, 22 | ) 23 | assert len(previews) == 1 24 | preview = previews[(root_dir / 'USERS.csv').as_posix()] 25 | assert preview.rows == [ 26 | { 27 | 'USER_ID': 0, 28 | 'AGE': 20.0, 29 | 'GENDER': 'male', 30 | }, 31 | { 32 | 'USER_ID': 1, 33 | 'AGE': 30.0, 34 | 'GENDER': 'female', 35 | }, 36 | { 37 | 'USER_ID': 2, 38 | 'AGE': 40.0, 39 | 'GENDER': 'female', 40 | }, 41 | { 42 | 'USER_ID': 3, 43 | 'AGE': None, 44 | 'GENDER': None, 45 | }, 46 | ] 47 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/tools/auth.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os 3 | from typing import Literal 4 | 5 | from fastmcp import FastMCP 6 | from fastmcp.exceptions import ToolError 7 | from kumoai.experimental import rfm 8 | 9 | from kumo_rfm_mcp import SessionManager 10 | 11 | 12 | async def authenticate( 13 | ) -> Literal["KumoRFM session successfully authenticated"]: 14 | """Authenticate the current KumoRFM session. 15 | 16 | Authentication is needed once before predicting or evaluating with the 17 | KumoRFM model. If the 'KUMO_API_KEY' environment variable is not set, 18 | initiates an OAuth2 authentication flow by opening a browser window for 19 | user login. Sets the 'KUMO_API_KEY' environment variable upon successful 20 | authentication. 21 | """ 22 | session = SessionManager.get_default_session() 23 | 24 | if session.is_initialized: 25 | raise ToolError("KumoRFM session is already authenticated") 26 | 27 | if os.getenv('KUMO_API_KEY') in {None, '', '${user_config.KUMO_API_KEY}'}: 28 | try: 29 | await asyncio.to_thread(rfm.authenticate) 30 | except Exception as e: 31 | raise ToolError( 32 | f"Failed to authenticate KumoRFM session: {e}") from e 33 | 34 | session.initialize() 35 | return "KumoRFM session successfully authenticated" 36 | 37 | 38 | def register_auth_tools(mcp: FastMCP) -> None: 39 | """Register all authentication tools to the MCP server.""" 40 | mcp.tool(annotations=dict( 41 | title="🔑 Signing in to KumoRFM…", 42 | readOnlyHint=False, 43 | destructiveHint=False, 44 | idempotentHint=False, 45 | openWorldHint=False, 46 | ))(authenticate) 47 | -------------------------------------------------------------------------------- /mcpb/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "dxt_version": "0.1", 3 | "name": "kumo-rfm-mcp", 4 | "display_name": "KumoRFM", 5 | "version": "0.1.0", 6 | "description": "KumoRFM MCP Server", 7 | "long_description": "KumoRFM is a pre-trained Relational Foundation Model (RFM) that generates training-free predictions on any relational multi-table data by interpreting the data as a (temporal) heterogeneous graph. It can be queried via the Predictive Query Language (PQL).", 8 | "author": { 9 | "name": "Kumo.AI", 10 | "email": "hello@kumo.ai" 11 | }, 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/kumo-ai/kumo-rfm-mcp" 15 | }, 16 | "homepage": "https://kumorfm.ai", 17 | "documentation": "https://kumo-ai.github.io/kumo-sdk/docs", 18 | "support": "https://github.com/kumo-ai/kumo-rfm-mcp/issues", 19 | "icon": "icon.png", 20 | "server": { 21 | "type": "python", 22 | "entry_point": "server.py", 23 | "mcp_config": { 24 | "command": "python", 25 | "args": ["${__dirname}/server.py"], 26 | "env": { 27 | "KUMO_API_KEY": "${user_config.KUMO_API_KEY}" 28 | }, 29 | "platform_overrides": { 30 | "darwin": { 31 | "command": "python3" 32 | } 33 | } 34 | } 35 | }, 36 | "keywords": [ 37 | "rfm", 38 | "mcp", 39 | "foundation model", 40 | "relational data", 41 | "kumo" 42 | ], 43 | "license": "MIT", 44 | "user_config": { 45 | "KUMO_API_KEY": { 46 | "type": "string", 47 | "title": "KumoRFM API key", 48 | "description": "You can generate your KumoRFM API key for free at https://kumorfm.ai. If not provided, you can authenticate on-the-fly in individual sessions.", 49 | "required": false, 50 | "sensitive": true 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /mcpb/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import subprocess 4 | import sys 5 | from pathlib import Path 6 | 7 | ROOT = Path(__file__).parent 8 | SYS_VENV = ROOT / 'sys_venv' 9 | UV_VENV = ROOT / 'uv_venv' 10 | REQUIREMENTS = ROOT / 'requirements.txt' 11 | 12 | 13 | def get_venv() -> Path: 14 | """Create two virtual environments for KumoRFM MCP. 15 | 1. A venv that inherits system Python version and installs `uv`. 16 | 2. A `uv` venv with Python 3.11. 17 | """ 18 | if os.name == 'nt': 19 | sys_python = SYS_VENV / 'Scripts' / 'python.exe' 20 | uv_python = UV_VENV / 'Scripts' / 'python.exe' 21 | else: 22 | sys_python = SYS_VENV / 'bin' / 'python' 23 | uv_python = UV_VENV / 'bin' / 'python' 24 | 25 | if not SYS_VENV.exists(): 26 | subprocess.check_call( 27 | [sys.executable, '-m', 'venv', SYS_VENV], 28 | stdout=subprocess.DEVNULL, 29 | ) 30 | 31 | if not UV_VENV.exists(): 32 | subprocess.check_call( 33 | [str(sys_python), '-m', 'pip', 'install', 'uv'], 34 | stdout=subprocess.DEVNULL, 35 | ) 36 | subprocess.check_call( 37 | [ 38 | str(sys_python), '-m', 'uv', 'venv', '--python', '3.11', 39 | str(UV_VENV) 40 | ], 41 | stdout=subprocess.DEVNULL, 42 | ) 43 | 44 | subprocess.check_call( 45 | [ 46 | str(sys_python), '-m', 'uv', 'pip', 'install', '-r', 47 | str(REQUIREMENTS), '--python', 48 | str(uv_python) 49 | ], 50 | stdout=subprocess.DEVNULL, 51 | ) 52 | 53 | return uv_python 54 | 55 | 56 | if __name__ == '__main__': 57 | python = get_venv() 58 | sys.exit(subprocess.call([str(python), '-m', 'kumo_rfm_mcp.server'])) 59 | -------------------------------------------------------------------------------- /test/tools/test_model.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from kumoapi.rfm import Explanation 3 | 4 | from kumo_rfm_mcp import UpdateGraphMetadata 5 | from kumo_rfm_mcp.tools.graph import materialize_graph, update_graph_metadata 6 | from kumo_rfm_mcp.tools.model import evaluate, explain, predict 7 | 8 | 9 | @pytest.mark.asyncio 10 | async def test_predict(graph: UpdateGraphMetadata) -> None: 11 | update_graph_metadata(graph) 12 | await materialize_graph() 13 | 14 | out = await predict( 15 | 'PREDICT USERS.AGE>20 FOR EACH USERS.USER_ID', 16 | indices=[0], 17 | anchor_time=None, 18 | run_mode='fast', 19 | num_neighbors=[16, 16], 20 | max_pq_iterations=20, 21 | ) 22 | assert len(out.predictions) == 1 23 | assert len(out.logs) == 0 24 | 25 | 26 | @pytest.mark.asyncio 27 | async def test_evaluate(graph: UpdateGraphMetadata) -> None: 28 | update_graph_metadata(graph) 29 | await materialize_graph() 30 | 31 | out = await evaluate( 32 | 'PREDICT USERS.AGE>20 FOR EACH USERS.USER_ID', 33 | metrics=None, 34 | anchor_time=None, 35 | run_mode='fast', 36 | num_neighbors=[16, 16], 37 | max_pq_iterations=20, 38 | ) 39 | assert set(out.metrics.keys()) == {'ap', 'auprc', 'auroc'} 40 | assert len(out.logs) == 0 41 | 42 | 43 | @pytest.mark.asyncio 44 | async def test_explain(graph: UpdateGraphMetadata) -> None: 45 | update_graph_metadata(graph) 46 | await materialize_graph() 47 | 48 | out = await explain( 49 | 'PREDICT USERS.AGE>20 FOR EACH USERS.USER_ID', 50 | index=0, 51 | anchor_time=None, 52 | num_neighbors=[16, 16], 53 | max_pq_iterations=20, 54 | ) 55 | assert isinstance(out.prediction, dict) 56 | assert isinstance(out.explanation, Explanation) 57 | assert len(out.logs) == 0 58 | -------------------------------------------------------------------------------- /.github/workflows/_release.yml: -------------------------------------------------------------------------------- 1 | name: Release on tag 2 | 3 | on: # yamllint disable-line rule:truthy 4 | push: 5 | tags: 6 | - 'v[0-9]+.[0-9]+.[0-9]+' # vX.Y.Z 7 | - 'v[0-9]+.[0-9]+.[0-9]+rc[0-9]+' # vX.Y.ZrcN 8 | 9 | jobs: 10 | release: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v5 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v5 17 | with: 18 | python-version: '3.10' 19 | 20 | - name: Build package 21 | run: | 22 | python -m pip install --upgrade pip build twine 23 | python -m build 24 | twine check dist/* 25 | 26 | - name: Create a GitHub release (pre-release) 27 | if: ${{ contains(github.ref_name, 'rc') }} 28 | run: | 29 | gh release create ${{ github.ref_name }} --verify-tag --generate-notes --title ${{ github.ref_name }} --prerelease 30 | env: 31 | GH_TOKEN: ${{ github.token }} 32 | 33 | - name: Create a GitHub release 34 | if: ${{ !contains(github.ref_name, 'rc') }} 35 | run: | 36 | gh release create ${{ github.ref_name }} --verify-tag --generate-notes --title ${{ github.ref_name }} 37 | env: 38 | GH_TOKEN: ${{ github.token }} 39 | 40 | - name: Upload to TestPyPI 41 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 42 | with: 43 | repository-url: https://test.pypi.org/legacy/ 44 | user: __token__ 45 | password: ${{ secrets.TEST_PYPI_TOKEN }} 46 | verbose: true 47 | 48 | - name: Upload to PyPI 49 | uses: pypa/gh-action-pypi-publish@76f52bc884231f62b9a034ebfe128415bbaabdfc # v1.12.4 50 | with: 51 | repository-url: https://upload.pypi.org/legacy/ 52 | user: __token__ 53 | password: ${{ secrets.PYPI_TOKEN }} 54 | verbose: true 55 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/session.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dataclasses import dataclass, field 3 | 4 | from fastmcp.exceptions import ToolError 5 | from kumoai.experimental import rfm 6 | from typing_extensions import Self 7 | 8 | 9 | @dataclass(init=False, repr=False) 10 | class Session: 11 | name: str 12 | _graph: rfm.LocalGraph = field(default_factory=lambda: rfm.LocalGraph([])) 13 | _model: rfm.KumoRFM | None = None 14 | 15 | def __init__(self, name: str) -> None: 16 | self.name = name 17 | self._graph = rfm.LocalGraph([]) 18 | self._model = None 19 | 20 | @property 21 | def is_initialized(self) -> bool: 22 | from kumoai import global_state 23 | return global_state.initialized 24 | 25 | @property 26 | def graph(self) -> rfm.LocalGraph: 27 | return self._graph 28 | 29 | @property 30 | def model(self) -> rfm.KumoRFM: 31 | if self._model is None: 32 | raise ToolError("Graph is not yet materialized") 33 | self.initialize() 34 | return self._model 35 | 36 | def clear(self) -> Self: 37 | """Clear the current session.""" 38 | self._graph = rfm.LocalGraph([]) 39 | self._model = None 40 | return self 41 | 42 | def initialize(self) -> Self: 43 | """Initialize a session from environment variables.""" 44 | if not self.is_initialized: 45 | if os.getenv('KUMO_API_KEY') is None: 46 | raise ToolError("Missing required environment variable " 47 | "'KUMO_API_KEY'. Please set your API key via " 48 | "`export KUMO_API_KEY='your-api-key'` or " 49 | "call the 'authenticate' tool to " 50 | "automatically generate an API key.") 51 | 52 | rfm.init() 53 | 54 | return self 55 | 56 | def __repr__(self) -> str: 57 | return f'{self.__class__.__name__}(name={self.name})' 58 | 59 | 60 | class SessionManager: 61 | _default: Session = Session(name='default') 62 | 63 | @classmethod 64 | def get_default_session(cls) -> Session: 65 | r"""Returns the default session.""" 66 | return cls._default 67 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | ci: 2 | autofix_prs: true 3 | autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions' 4 | autoupdate_schedule: monthly 5 | 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v6.0.0 9 | hooks: 10 | - id: no-commit-to-branch 11 | name: No commits to master 12 | - id: end-of-file-fixer 13 | name: End-of-file fixer 14 | - id: mixed-line-ending 15 | name: Fix mixed line endings 16 | args: [--fix, lf] 17 | - id: trailing-whitespace 18 | name: Remove trailing whitespaces 19 | - id: check-toml 20 | name: Check toml 21 | 22 | - repo: https://github.com/adrienverge/yamllint 23 | rev: v1.37.1 24 | hooks: 25 | - id: yamllint 26 | name: Lint yaml 27 | args: [-d, '{extends: default, rules: {line-length: disable, document-start: disable, truthy: {level: error}, braces: {max-spaces-inside: 1}}}'] 28 | 29 | - repo: https://github.com/asottile/pyupgrade 30 | rev: v3.21.2 31 | hooks: 32 | - id: pyupgrade 33 | name: Upgrade Python syntax 34 | args: [--py310-plus] 35 | 36 | - repo: https://github.com/PyCQA/autoflake 37 | rev: v2.3.1 38 | hooks: 39 | - id: autoflake 40 | name: Remove unused imports and variables 41 | args: [ 42 | --remove-all-unused-imports, 43 | --remove-unused-variables, 44 | --remove-duplicate-keys, 45 | --ignore-init-module-imports, 46 | --in-place, 47 | ] 48 | 49 | - repo: https://github.com/google/yapf 50 | rev: v0.43.0 51 | hooks: 52 | - id: yapf 53 | name: Format code 54 | additional_dependencies: [toml] 55 | 56 | - repo: https://github.com/pycqa/isort 57 | rev: 7.0.0 58 | hooks: 59 | - id: isort 60 | name: Sort imports 61 | 62 | - repo: https://github.com/PyCQA/flake8 63 | rev: 7.3.0 64 | hooks: 65 | - id: flake8 66 | name: Check PEP8 67 | 68 | - repo: https://github.com/astral-sh/ruff-pre-commit 69 | rev: v0.14.7 70 | hooks: 71 | - id: ruff 72 | name: Ruff formatting 73 | args: [--fix, --exit-non-zero-on-fix] 74 | 75 | - repo: https://github.com/executablebooks/mdformat 76 | rev: 0.7.22 77 | hooks: 78 | - id: mdformat 79 | name: Format Markdown 80 | additional_dependencies: 81 | - mdformat-gfm 82 | - mdformat_frontmatter 83 | - mdformat_footnote 84 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "kumo-rfm-mcp" 7 | dynamic = ["version"] 8 | description = "Model Context Protocol server for KumoRFM" 9 | readme = "README.md" 10 | license = { text = "MIT" } 11 | authors = [ 12 | { name = "Kumo.AI", email = "hello@kumo.ai" } 13 | ] 14 | keywords = [ 15 | "rfm", 16 | "mcp", 17 | "foundation model", 18 | "relational data", 19 | "kumo", 20 | ] 21 | requires-python = ">=3.10" 22 | dependencies = [ 23 | "kumoai==2.10.1", 24 | "fastmcp>=2.2.7,<3", 25 | ] 26 | classifiers = [ 27 | "License :: OSI Approved :: MIT License", 28 | "Development Status :: 3 - Alpha", 29 | "Environment :: Console", 30 | "Operating System :: OS Independent", 31 | "Intended Audience :: Developers", 32 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 33 | "Topic :: Scientific/Engineering :: Information Analysis", 34 | "Topic :: Software Development :: Libraries :: Python Modules", 35 | "Programming Language :: Python :: 3", 36 | "Programming Language :: Python :: 3.10", 37 | "Programming Language :: Python :: 3.11", 38 | "Programming Language :: Python :: 3.12", 39 | "Programming Language :: Python :: 3.13", 40 | ] 41 | 42 | [project.optional-dependencies] 43 | s3 = ["s3fs"] 44 | dev = [ 45 | "pytest", 46 | "pytest-asyncio", 47 | ] 48 | 49 | [project.scripts] 50 | kumo-rfm-mcp = "kumo_rfm_mcp.server:main" 51 | 52 | [tool.hatch.build] 53 | exclude = [ 54 | ".github", 55 | "mcpb", 56 | ".pre-commit-config.yaml", 57 | ] 58 | 59 | [tool.hatch.build.targets.wheel] 60 | packages = ["kumo_rfm_mcp"] 61 | 62 | [tool.hatch.version] 63 | path = "kumo_rfm_mcp/_version.py" 64 | 65 | [project.urls] 66 | Homepage = "https://kumorfm.ai" 67 | Repository = "https://github.com/kumo-ai/kumo-rfm-mcp.git" 68 | Issues = "https://github.com/kumo-ai/kumo-rfm-mcp/issues" 69 | 70 | [tool.yapf] 71 | based_on_style="pep8" 72 | split_before_named_assigns=false 73 | blank_line_before_nested_class_or_def=false 74 | 75 | [tool.isort] 76 | multi_line_output=3 77 | include_trailing_comma=true 78 | skip=[".gitignore", "__init__.py"] 79 | 80 | [tool.pytest.ini_options] 81 | addopts=[ 82 | "--capture=no", 83 | "--color=yes", 84 | "-vv", 85 | ] 86 | testpaths=["test"] 87 | 88 | [tool.mypy] 89 | files=["kumo_rfm_mcp", "test", "mcpb"] 90 | python_version="3.10" 91 | install_types=true 92 | non_interactive=true 93 | ignore_missing_imports=true 94 | show_error_codes=true 95 | warn_redundant_casts=true 96 | warn_unused_configs=true 97 | warn_unused_ignores=true 98 | disallow_untyped_defs=true 99 | disallow_incomplete_defs=true 100 | check_untyped_defs=true 101 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/resources/overview.md: -------------------------------------------------------------------------------- 1 | # Overview of KumoRFM 2 | 3 | KumoRFM is a **pre-trained Relational Foundation Model (RFM)** that generates training-free predictions on any relational multi-table data by interpreting the data as a (temporal) heterogeneous graph. 4 | It can be queried via the **Predictive Query Language (PQL)**. 5 | 6 | **Internal note:** It is NOT related to "Recency, Frequency, Monetary" analysis. 7 | 8 | Internally, KumoRFM utilizes in-context learning to transfer patterns from historical examples to new unseen examples. 9 | Specifically, it constructs training/in-context subgraphs with known ground-truth labels and relates them to unseen subgraphs. 10 | 11 | KumoRFM can discover table-like files (e.g., CSV, Parquet), inspect them, and structure them into a graph via foreign key-primary key relationships. 12 | A time column in a table dictates the create time of a row, which is used downstream to receive and order historical interactions and prevent temporal leakage. 13 | Each column within a table is assigned a semantic type (`"numerical"`, `"categorical"`, `"multicategorical"`, `"ID"`, `"text"`, `"timestamp"`, `"sequence"`, etc) that denotes the semantic meaning of the column and how it should be processed within the model. 14 | 15 | See the `kumo://docs/graph-setup` resource for more information. 16 | 17 | After a graph is set up and materialized, KumoRFM can generate predictions (e.g., missing value imputation, temporal forecasts) and evaluations by querying the graph via the Predictive Query Language (PQL), a declarative language to formulate machine learning tasks. 18 | Understanding PQL and how it maps to a machine learning task is critical to achieve good model predictions. 19 | Besides PQL, various other options exist to tune model output, e.g., optimizing the `run_mode` of the model, specifying how subgraphs are formed via `num_neighbors`, or adjusting the `anchor_time` to denote the point in time for when a prediction should be made. 20 | 21 | See the `kumo://docs/predictive-query` resource for more information. 22 | 23 | ## Getting Started 24 | 25 | 1. Finding, inspecting and understanding table-like files via `find_table_files` and `inspect_table_files` tools 26 | 1. Constructing and updating the graph schema via `update_graph_metadata` by adding/updating and removing tables and their schema, and inter-connecting them via foreign key-primary key relationshsips 27 | 1. Visualizing the graph schema as a Mermaid entity relationship diagram via `get_mermaid` 28 | 1. Materializing the graph via `materialize_graph` to make it available for inference operations; This step is necessary to efficiently form subgraphs around entities at any given point in time 29 | 1. Predicting and evaluating predictive queries on top of the materialized graph via `predict` and `evaluate` to obtain valuable insights for the future 30 | 31 | ## Quick Access 32 | 33 | Use the `get_docs` tool to access any resource: 34 | 35 | ``` 36 | get_docs('kumo://docs/graph-setup') 37 | get_docs('kumo://docs/predictive-query") 38 | ``` 39 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | import sys 4 | from pathlib import Path 5 | 6 | from fastmcp import FastMCP 7 | from fastmcp.resources import FileResource 8 | from pydantic import AnyUrl 9 | 10 | import kumo_rfm_mcp 11 | from kumo_rfm_mcp import tools 12 | 13 | logging.basicConfig( 14 | level=logging.INFO, 15 | format='[%(levelname)s] - %(asctime)s - %(name)s - %(message)s', 16 | stream=sys.stderr) 17 | logger = logging.getLogger('kumo-rfm-mcp') 18 | 19 | mcp = FastMCP( 20 | name='KumoRFM (Relational Foundation Model)', 21 | instructions=("KumoRFM is a pre-trained Relational Foundation Model (RFM) " 22 | "that generates training-free predictions on any relational " 23 | "multi-table data by interpreting the data as a (temporal) " 24 | "heterogeneous graph. It can be queried via the Predictive " 25 | "Query Language (PQL)."), 26 | version=kumo_rfm_mcp.__version__, 27 | ) 28 | 29 | # Tools ###################################################################### 30 | tools.register_docs_tools(mcp) 31 | tools.register_auth_tools(mcp) 32 | tools.register_io_tools(mcp) 33 | tools.register_graph_tools(mcp) 34 | tools.register_model_tools(mcp) 35 | 36 | # Resources ################################################################## 37 | mcp.add_resource( 38 | FileResource( 39 | uri=AnyUrl('kumo://docs/overview'), 40 | path=Path(__file__).parent / 'resources' / 'overview.md', 41 | name="Overview of KumoRFM", 42 | description="Overview of KumoRFM (Relational Foundation Model)", 43 | mime_type='text/markdown', 44 | tags={'documentation'}, 45 | )) 46 | mcp.add_resource( 47 | FileResource( 48 | uri=AnyUrl('kumo://docs/graph-setup'), 49 | path=Path(__file__).parent / 'resources' / 'graph-setup.md', 50 | name="Graph Setup", 51 | description="How to set up graphs in KumoRFM", 52 | mime_type='text/markdown', 53 | tags={'documentation'}, 54 | )) 55 | mcp.add_resource( 56 | FileResource( 57 | uri=AnyUrl('kumo://docs/predictive-query'), 58 | path=Path(__file__).parent / 'resources' / 'predictive-query.md', 59 | name="Predictive Query", 60 | description="How to query and generate predictions in KumoRFM", 61 | mime_type='text/markdown', 62 | tags={'documentation'}, 63 | )) 64 | mcp.add_resource( 65 | FileResource( 66 | uri=AnyUrl('kumo://docs/explainability'), 67 | path=Path(__file__).parent / 'resources' / 'explainability.md', 68 | name="Explainability", 69 | description="How to interpret and summarize explanations of KumoRFM", 70 | mime_type='text/markdown', 71 | tags={'documentation'}, 72 | )) 73 | 74 | 75 | def main() -> None: 76 | """Main entry point for the CLI command.""" 77 | try: 78 | mcp.run(transport='stdio') 79 | except KeyboardInterrupt: 80 | logger.info("Server shutdown requested by user") 81 | sys.exit(0) 82 | except Exception as e: 83 | logger.error(f"Failed to start KumoRFM MCP server: {e}") 84 | sys.exit(1) 85 | 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/tools/io.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import os.path as osp 3 | from pathlib import Path 4 | from typing import Annotated 5 | 6 | import pandas as pd 7 | from fastmcp import FastMCP 8 | from fastmcp.exceptions import ToolError 9 | from pydantic import Field 10 | 11 | from kumo_rfm_mcp import TableSource, TableSourcePreview 12 | 13 | 14 | async def find_table_files( 15 | path: Annotated[Path, "Local root directory to scan"], 16 | recursive: Annotated[ 17 | bool, 18 | Field( 19 | default=False, 20 | description=("Whether to scan subdirectories recursively. Use " 21 | "with caution in large directories such as home " 22 | "folders or system directories."), 23 | ), 24 | ], 25 | ) -> list[TableSource]: 26 | """Finds all table-like files (e.g., CSV, Parquet) in a directory. 27 | 28 | This tool is for local directories only. It cannot search, e.g., in S3 29 | buckets. 30 | """ 31 | path = path.expanduser() 32 | 33 | if not path.exists() or not path.is_dir(): 34 | raise ToolError(f"Directory '{path}' does not exist") 35 | 36 | def _find_table_files() -> list[TableSource]: 37 | pattern = "**/*" if recursive else "*" 38 | suffixes = {'.csv', '.parquet'} 39 | files = [f for f in path.glob(pattern) if f.suffix.lower() in suffixes] 40 | return [ 41 | TableSource(path=f, bytes=f.stat().st_size) for f in sorted(files) 42 | ] 43 | 44 | return await asyncio.to_thread(_find_table_files) 45 | 46 | 47 | async def inspect_table_files( 48 | paths: Annotated[ 49 | list[str], 50 | ("File paths to inspect. Can be a mix of local file paths, S3 URIs " 51 | "(s3://...), or HTTP/HTTPS URLs."), 52 | ], 53 | num_rows: Annotated[ 54 | int, 55 | Field( 56 | default=20, 57 | ge=1, 58 | le=1000, 59 | description="Number of rows to read per file", 60 | ), 61 | ], 62 | ) -> dict[str, TableSourcePreview]: 63 | """Inspect the first rows of table-like files. 64 | 65 | Each row in a file is represented as a dictionary mapping column 66 | names to their corresponding values. 67 | """ 68 | def read_file(path: str) -> TableSourcePreview: 69 | path = osp.expanduser(path) 70 | suffix = path.rsplit('.', maxsplit=1)[-1].lower() 71 | 72 | if suffix not in {'csv', 'parquet'}: 73 | raise ToolError(f"'{path}' is not a valid CSV or Parquet file") 74 | 75 | try: 76 | if suffix == 'csv': 77 | df = pd.read_csv(path, nrows=num_rows) 78 | else: 79 | assert suffix == 'parquet' 80 | # TODO Read first row groups via `pyarrow` instead. 81 | df = pd.read_parquet(path).head(num_rows) 82 | except Exception as e: 83 | raise ToolError(f"Could not read file '{path}': {e}") from e 84 | 85 | df = df.astype(object).where(df.notna(), None) 86 | return TableSourcePreview(rows=df.to_dict(orient='records')) 87 | 88 | tasks = [asyncio.to_thread(read_file, path) for path in paths] 89 | previews = await asyncio.gather(*tasks) 90 | return {path: preview for path, preview in zip(paths, previews)} 91 | 92 | 93 | def register_io_tools(mcp: FastMCP) -> None: 94 | """Register all I/O tools to the MCP server.""" 95 | mcp.tool(annotations=dict( 96 | title="🔍 Searching for tabular files…", 97 | readOnlyHint=True, 98 | destructiveHint=False, 99 | idempotentHint=True, 100 | openWorldHint=False, 101 | ))(find_table_files) 102 | 103 | mcp.tool(annotations=dict( 104 | title="🧐 Analyzing table structure…", 105 | readOnlyHint=True, 106 | destructiveHint=False, 107 | idempotentHint=True, 108 | openWorldHint=False, 109 | ))(inspect_table_files) 110 | -------------------------------------------------------------------------------- /test/conftest.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any 3 | 4 | import pandas as pd 5 | import pytest 6 | from kumoai.experimental import rfm 7 | from kumoai.experimental.rfm.rfm import Explanation 8 | from kumoapi.rfm import Explanation as ExplanationConfig 9 | from kumoapi.task import TaskType 10 | from pytest import TempPathFactory 11 | 12 | from kumo_rfm_mcp import ( 13 | AddTableMetadata, 14 | LinkMetadata, 15 | SessionManager, 16 | UpdateGraphMetadata, 17 | ) 18 | 19 | 20 | @pytest.fixture(autouse=True) 21 | def clear_session() -> None: 22 | SessionManager.get_default_session().clear() 23 | 24 | 25 | @pytest.fixture(autouse=True) 26 | def mock_model(monkeypatch: pytest.MonkeyPatch) -> None: 27 | monkeypatch.setenv('KUMO_API_KEY', 'DUMMY') 28 | monkeypatch.setattr(rfm, 'init', lambda *args, **kwargs: None) 29 | 30 | def predict( 31 | *args: Any, 32 | explain: bool = False, 33 | **kwargs: Any, 34 | ) -> pd.DataFrame | Explanation: 35 | 36 | df = pd.DataFrame({ 37 | 'ENTITY': [0], 38 | 'ANCHOR_TIMESTAMP': ['2025-01-1'], 39 | 'TARGET_PRED': [True], 40 | 'False_PROB': [0.4], 41 | 'True_PROB': [0.6], 42 | }) 43 | 44 | if not explain: 45 | return df 46 | 47 | return Explanation( 48 | prediction=df, 49 | summary='', 50 | details=ExplanationConfig( 51 | task_type=TaskType.BINARY_CLASSIFICATION, 52 | cohorts=[], 53 | subgraphs=[], 54 | ), 55 | ) 56 | 57 | def evaluate(*args: Any, **kwargs: Any) -> pd.DataFrame: 58 | return pd.DataFrame({ 59 | 'metric': ['ap', 'auprc', 'auroc'], 60 | 'value': [0.8, 0.8, 0.9], 61 | }) 62 | 63 | monkeypatch.setattr(rfm.KumoRFM, 'predict', predict) 64 | monkeypatch.setattr(rfm.KumoRFM, 'evaluate', evaluate) 65 | 66 | 67 | @pytest.fixture(scope='session') 68 | def root_dir(tmp_path_factory: TempPathFactory) -> Path: 69 | path = tmp_path_factory.mktemp('table_files') 70 | 71 | df = pd.DataFrame({ 72 | 'USER_ID': [0, 1, 2, 3], 73 | 'AGE': [20, 30, 40, float('NaN')], 74 | 'GENDER': ['male', 'female', 'female', None], 75 | }) 76 | df.to_csv(path / 'USERS.csv', index=False) 77 | 78 | df = pd.DataFrame({ 79 | 'USER_ID': [0, 1, 2, 3], 80 | 'STORE_ID': [0, 1, 0, 1], 81 | 'AMOUNT': [10, 15, float('NaN'), 20], 82 | 'TIME': ['2025-01-01', '2025-01-02', '2025-01-03', '2025-01-04'], 83 | }) 84 | df.to_parquet(path / 'ORDERS.parquet') 85 | 86 | df = pd.DataFrame({ 87 | 'STORE_ID': [0, 1], 88 | 'CAT': ['burger', 'pizza'], 89 | }) 90 | df.to_csv(path / 'STORES.csv', index=False) 91 | 92 | return path 93 | 94 | 95 | @pytest.fixture 96 | def graph(root_dir: Path) -> UpdateGraphMetadata: 97 | return UpdateGraphMetadata( # type: ignore[call-arg] 98 | tables_to_add=[ 99 | AddTableMetadata( 100 | path=(root_dir / 'USERS.csv').as_posix(), 101 | name='USERS', 102 | primary_key='USER_ID', 103 | time_column=None, 104 | end_time_column=None, 105 | ), 106 | AddTableMetadata( 107 | path=(root_dir / 'ORDERS.parquet').as_posix(), 108 | name='ORDERS', 109 | primary_key=None, 110 | time_column='TIME', 111 | end_time_column=None, 112 | ), 113 | AddTableMetadata( 114 | path=(root_dir / 'STORES.csv').as_posix(), 115 | name='STORES', 116 | primary_key='STORE_ID', 117 | time_column=None, 118 | end_time_column=None, 119 | ), 120 | ], 121 | links_to_add=[ 122 | LinkMetadata( 123 | source_table='ORDERS', 124 | foreign_key='USER_ID', 125 | destination_table='USERS', 126 | ), 127 | LinkMetadata( 128 | source_table='ORDERS', 129 | foreign_key='STORE_ID', 130 | destination_table='STORES', 131 | ), 132 | ], 133 | ) 134 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/tools/docs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Literal 3 | 4 | from fastmcp import FastMCP 5 | from fastmcp.exceptions import ToolError 6 | from fastmcp.resources import FileResource 7 | 8 | 9 | def register_docs_tools(mcp: FastMCP) -> None: 10 | """Register all documentation tools to the MCP server.""" 11 | @mcp.tool(annotations=dict( 12 | title="📖 Reading…", 13 | readOnlyHint=True, 14 | destructiveHint=False, 15 | idempotentHint=False, 16 | openWorldHint=False, 17 | )) 18 | async def get_docs( 19 | resource_uri: Literal[ 20 | 'kumo://docs/overview', 21 | 'kumo://docs/graph-setup', 22 | 'kumo://docs/predictive-query', 23 | 'kumo://docs/explainability', 24 | ] 25 | ) -> str: 26 | """Get documentation on how to use KumoRFM. 27 | 28 | KumoRFM is a pre-trained Relational Foundation Model (RFM) that 29 | generates training-free predictions on any relational multi-table 30 | data by interpreting the data as a (temporal) heterogeneous graph. It 31 | can be queried via the Predictive Query Language (PQL). 32 | 33 | Internal note: It is NOT related to "Recency, Frequency, Monetary" 34 | analysis. 35 | 36 | Internally, KumoRFM utilizes in-context learning to transfer patterns 37 | from historical examples to new unseen examples. Specifically, it 38 | constructs training/in-context subgraphs with known ground-truth 39 | labels and relates them to unseen subgraphs. 40 | 41 | See the 'kumo://docs/overview' resource for more information. 42 | 43 | KumoRFM can discover table-like files (e.g., CSV, Parquet), inspect 44 | them, and structure them into a graph via foreign key-primary key 45 | relationships. A time column in a table dictates the create time of a 46 | row, which is used downstream to receive and order historical 47 | interactions and prevent temporal leakage. Each column within a table 48 | is assigned a semantic type (numerical, categorical, multi-categorical, 49 | ID, text, timestamp, sequence, etc) that denotes the semantic meaning 50 | of the column and how it should be processed within the model. 51 | 52 | Important: Before creating and updating graphs, read the 53 | documentation first at 'kumo://docs/graph-setup'. 54 | 55 | After a graph is set up and materialized, KumoRFM can generate 56 | predictions (e.g., missing value imputation, temporal forecasts) and 57 | evaluations by querying the graph via the Predictive Query Language 58 | (PQL), a declarative language to formulate machine learning tasks. 59 | Understanding PQL and how it maps to a machine learning task is 60 | critical to achieve good model predictions. Besides PQL, various other 61 | options exist to tune model output, e.g., optimizing the `run_mode` of 62 | the model, specifying how subgraphs are formed via `num_neighbors`, or 63 | adjusting the `anchor_time` to denote the point in time for when a 64 | prediction should be made. 65 | 66 | Important: Before executing or suggesting any predictive queries, 67 | read the documentation first at 'kumo://docs/predictive-query'. 68 | 69 | KumoRFM can additionally generate explanations for predictions, 70 | providing both a global column-level analysis and a local, cell-level 71 | attribution view. 72 | Together, these views enable comprehensive interpretation. 73 | 74 | Important: Before analyzing the explanation output, read the 75 | documentation first at 'kumo://docs/explainability'. 76 | """ 77 | resources = await mcp.get_resources() 78 | if resource_uri not in resources: 79 | raise ToolError(f"Resource '{resource_uri}' not found. Available " 80 | f"resources: {list(resources.keys())}") 81 | 82 | resource = resources[resource_uri] 83 | 84 | if isinstance(resource, FileResource): 85 | if getattr(resource, 'path', None): 86 | path = Path(resource.path) 87 | else: # Construct path from URI: 88 | name = f"{str(resource.uri).rsplit('/', 1)[-1]}.md" 89 | path = Path(__file__).parent.parent / 'resources' / name 90 | 91 | if not path.exists(): 92 | raise ToolError(f"File resource '{resource_uri}' not found at " 93 | "'{path}'") 94 | 95 | return path.read_text(encoding='utf-8') 96 | 97 | raise ToolError(f"Resource '{resource_uri}' is not accessible") 98 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/resources/explainability.md: -------------------------------------------------------------------------------- 1 | # Explainability 2 | 3 | KumoRFM explanations provide two complementary views of model predictions: 4 | 5 | 1. **Global View (Cohorts):** Column-level patterns across in-context examples that reveal what data characteristics drive predictions 6 | 1. **Local View (Subgraph):** Cell-level attribution scores showing which specific values in this entity's subgraph influenced the prediction 7 | 8 | Together, these views answer: "What patterns does the model see globally?" and "Which specific data points matter for this prediction?" 9 | 10 | ## Understanding the Global View: Cohorts 11 | 12 | Cohorts reveal how different value ranges or categories in columns correlate with prediction outcomes across all in-context examples. 13 | 14 | - `table_name`: Which table this analysis covers 15 | - `column_name`: Which column or statistic (e.g., `COUNT(*)`) this analysis covers 16 | - `hop`: Distance from the entity table (0 = entity attributes, 1 = direct neighbors, 2 = second-degree neighbors, ...) 17 | - `stype`: Semantic type (numerical, categorical, timestamp, etc) 18 | - `cohorts`: List of value ranges/categories (e.g., `["[0-5]", "(5-10]", "(10-20+]"]`) 19 | - `populations`: Proportion of in-context examples in each cohort 20 | - `targets`: Average prediction score within each cohort 21 | 22 | High-impact columns usually have large variance in `targets` across different cohorts. 23 | 24 | **Example for a churn predictive query:** 25 | 26 | ``` 27 | table_name: "orders" 28 | column_name: "COUNT(*)" 29 | hop: 1 30 | cohorts: ["[0-0]", "(0-1]", "(1-2]", "(2-4]", "(4-6+]"] 31 | populations: [0.20, 0.08, 0.07, 0.11, 0.54] 32 | targets: [0.0, 0.78, 0.74, 0.64, 0.35] 33 | ``` 34 | 35 | **What this means:** 36 | 37 | - Users with 0 orders have 0% churn risk (they already churned) 38 | - Users with 1-2 orders have ~75% churn risk (early stage, not sticky) 39 | - Users with 6+ orders have 35% churn risk (established, but not immune) 40 | - Key insight: Order count is strongly predictive; more orders = lower churn 41 | 42 | ## Understanding the Local View: Subgraph 43 | 44 | Subgraphs show the actual data neighborhood around the specific entity being predicted, with attribution scores indicating importance. 45 | Node indices are different from primary keys and are mapped to a contiguous range from 0 to N. 46 | The entity being predicted is guaranteed to have ID 0. 47 | Some cells may have a `null` value with non-zero scores, indicating missingness itself is informative. 48 | 49 | Each node represents a row from a table, containing: 50 | 51 | - `cells`: Dictionary of column values with attribution scores 52 | - `value`: Actual data value 53 | - `score`: Gradient-based importance between 0 and 1 (higher = more influential) 54 | - `links`: Connections to other nodes via foreign keys 55 | 56 | Scores reflect how much changing this value would change the prediction. 57 | High scores on specific cells explain "why this prediction, not another". 58 | 59 | **Score Magnitude Interpretation:** 60 | 61 | - 0.00 - 0.05: Negligible influence 62 | - 0.05 - 0.15: Moderate influence 63 | - 0.15 - 0.30: Strong influence 64 | - 0.30+: Critical influence 65 | 66 | **Example:** 67 | 68 | ``` 69 | cells: { 70 | "club_member_status": {value: "ACTIVE", score: 1.0}, 71 | "age": {value: 49, score: 0.089}, 72 | "fashion_news_frequency": {value: "Regularly", score: 0.411} 73 | } 74 | links: { 75 | "user_id->orders": [1,2,3,...,32] 76 | } 77 | ``` 78 | 79 | **What this means:** 80 | 81 | Club membership status is the most important attribute (score=1.0) 82 | Fashion news subscription is moderately important (score=0.411). 83 | Age contributes but is less critical (score=0.089). 84 | User has 32 orders linked (indicates high activity). 85 | 86 | You can follow paths in the subgraph to understand data connectivity and how tables/cells far away may contribute to the prediction. 87 | 88 | ## Connecting Global and Local Views 89 | 90 | Often times, you can understand high subgraph attribution scores by relating their cell values to the average prediction of the cohort. 91 | 92 | 1. **Find influential cells for the prediction in the local view:** 93 | Which cells have scores > 0.15? 94 | 1. **Locate entity in global context:** 95 | Find which cohorts the specific entity falls into and compare entity's values to high/low risk cohorts. 96 | Focus on highest-scoring cells and most divergent cohorts. 97 | 1. **Relate attribution score and cohort prediction:** 98 | Check if entity exhibits typical or atypical patterns. 99 | 1. **Find general global trends** in the data that might explain the prediction. 100 | Additionally, look for missing expected signals (why ISN'T something important?) 101 | 102 | Tell a coherent story connecting global patterns to local evidence. 103 | Use concrete numbers from the subgraph. 104 | Avoid jargon; explain in business terms. 105 | 106 | ## Common Interpretation Pitfalls 107 | 108 | - **Don't assume correlation = causation:** 109 | High scores show model importance, not real-world causality. 110 | For example, "black clothing" might correlate with churn, but color isn't the cause. 111 | - **Consider data distribution:** 112 | Rare cohorts may show extreme `targets` with small `populations`. 113 | Focus on cohorts with both significant population AND divergent targets. 114 | - **Missing cohort analysis:** 115 | Not all columns have a cohort analysis since some semantic types are unsupported. 116 | For example, text and ID columns typically only appear in local view. 117 | -------------------------------------------------------------------------------- /test/tools/test_graph.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from kumoapi.typing import Stype 5 | 6 | from kumo_rfm_mcp import ( 7 | AddTableMetadata, 8 | LinkMetadata, 9 | UpdateGraphMetadata, 10 | UpdateTableMetadata, 11 | ) 12 | from kumo_rfm_mcp.tools.graph import ( 13 | get_mermaid, 14 | inspect_graph_metadata, 15 | lookup_table_rows, 16 | materialize_graph, 17 | update_graph_metadata, 18 | ) 19 | 20 | 21 | def test_graph_metadata(root_dir: Path) -> None: 22 | graph = inspect_graph_metadata() 23 | assert len(graph.tables) == 0 24 | assert len(graph.links) == 0 25 | 26 | update = UpdateGraphMetadata( # type: ignore[call-arg] 27 | tables_to_add=[ 28 | AddTableMetadata( 29 | path=(root_dir / 'USERS.csv').as_posix(), 30 | name='USERS', 31 | primary_key='USER_ID', 32 | time_column=None, 33 | end_time_column=None, 34 | ), 35 | AddTableMetadata( 36 | path=(root_dir / 'ORDERS.parquet').as_posix(), 37 | name='ORDERS', 38 | primary_key=None, 39 | time_column='TIME', 40 | end_time_column=None, 41 | ), 42 | AddTableMetadata( 43 | path=(root_dir / 'STORES.csv').as_posix(), 44 | name='STORES', 45 | primary_key='STORE_ID', 46 | time_column=None, 47 | end_time_column=None, 48 | ), 49 | ]) 50 | update_graph_metadata(update) 51 | out = update_graph_metadata(update) # idempotent 52 | assert len(out.graph.tables) == 3 53 | assert len(out.graph.links) == 0 54 | assert len(out.errors) == 0 55 | 56 | update = UpdateGraphMetadata( # type: ignore[call-arg] 57 | tables_to_update={ 58 | 'USERS': UpdateTableMetadata( # type: ignore[call-arg] 59 | stypes={ 60 | 'AGE': Stype.categorical, 61 | 'GENDER': None, 62 | }, 63 | ) 64 | } 65 | ) 66 | update_graph_metadata(update) 67 | out = update_graph_metadata(update) # idempotent 68 | assert len(out.graph.tables) == 3 69 | assert len(out.graph.links) == 0 70 | assert len(out.errors) == 0 71 | assert out.graph.tables[0].stypes['AGE'] == Stype.categorical 72 | assert out.graph.tables[0].stypes['GENDER'] is None 73 | 74 | update = UpdateGraphMetadata( # type: ignore[call-arg] 75 | links_to_add=[ 76 | LinkMetadata( 77 | source_table='ORDERS', 78 | foreign_key='USER_ID', 79 | destination_table='USERS', 80 | ), 81 | LinkMetadata( 82 | source_table='ORDERS', 83 | foreign_key='STORE_ID', 84 | destination_table='STORES', 85 | ), 86 | ]) 87 | update_graph_metadata(update) 88 | out = update_graph_metadata(update) # idempotent 89 | assert len(out.graph.tables) == 3 90 | assert len(out.graph.links) == 2 91 | assert len(out.errors) == 0 92 | 93 | update = UpdateGraphMetadata( # type: ignore[call-arg] 94 | links_to_remove=[ 95 | LinkMetadata( 96 | source_table='ORDERS', 97 | foreign_key='USER_ID', 98 | destination_table='USERS', 99 | ), 100 | ], 101 | tables_to_remove=['STORES'], 102 | ) 103 | update_graph_metadata(update) 104 | out = update_graph_metadata(update) # idempotent 105 | assert len(out.graph.tables) == 2 106 | assert len(out.graph.links) == 0 107 | assert len(out.errors) == 0 108 | 109 | 110 | def test_get_mermaid(graph: UpdateGraphMetadata) -> None: 111 | update_graph_metadata(graph) 112 | mermaid = get_mermaid(show_columns=False) 113 | assert mermaid == ('erDiagram\n' 114 | ' USERS {\n' 115 | ' ID USER_ID PK\n' 116 | ' }\n' 117 | ' ORDERS {\n' 118 | ' ID USER_ID FK\n' 119 | ' ID STORE_ID FK\n' 120 | ' timestamp TIME\n' 121 | ' }\n' 122 | ' STORES {\n' 123 | ' ID STORE_ID PK\n' 124 | ' }\n' 125 | '\n' 126 | ' USERS o|--o{ ORDERS : USER_ID\n' 127 | ' STORES o|--o{ ORDERS : STORE_ID') 128 | 129 | 130 | @pytest.mark.asyncio 131 | async def test_materialize_graph(graph: UpdateGraphMetadata) -> None: 132 | update_graph_metadata(graph) 133 | await materialize_graph() 134 | out = await materialize_graph() # idempotent 135 | assert out.num_nodes == 10 136 | assert out.num_edges == 16 137 | assert out.time_ranges == { 138 | 'ORDERS': '2025-01-01 00:00:00 - 2025-01-04 00:00:00' 139 | } 140 | 141 | 142 | @pytest.mark.asyncio 143 | async def test_lookup_table_rows(graph: UpdateGraphMetadata) -> None: 144 | update_graph_metadata(graph) 145 | await materialize_graph() 146 | preview = await lookup_table_rows('USERS', ids=[1, 0]) 147 | assert preview.rows == [ 148 | { 149 | 'USER_ID': 1, 150 | 'AGE': 30.0, 151 | 'GENDER': 'female', 152 | }, 153 | { 154 | 'USER_ID': 0, 155 | 'AGE': 20.0, 156 | 'GENDER': 'male', 157 | }, 158 | ] 159 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/resources/graph-setup.md: -------------------------------------------------------------------------------- 1 | # Graph Setup 2 | 3 | This guide outlines the data requirements and best practices for setting up graphs from relational data in KumoRFM. 4 | 5 | KumoRFM operates on relational data organized as inter-connected tables forming a graph structure. The foundation of this process starts with a set of CSV or Parquet files, which are registered as table schemas and assembled into a graph schema. 6 | 7 | ## Table Schema 8 | 9 | A table schema is defined by three concepts: 10 | 11 | - **Semantic types (`stypes`):** Semantic types denote the semantic meaning of columns in a table and how they should be processed within the model 12 | - **Primary key (`primary_key`):** A unique identifier for the table 13 | - **Time column (`time_column`):** The column that denotes the create time of rows (marking when the row became active) 14 | - **End time column (`end_time_column`):** The column that denotes the end time of rows (marking when the row stopped being active) 15 | 16 | ### Semantic Types 17 | 18 | The semantic type of a column will determine how it will be encoded downstream. 19 | Correctly setting each column's semantic type is critical for model performance. 20 | For instance, for missing value imputation queries, the semantic type determines whether the task is treated as regression (`stype="numerical"`) or as classification (`stype="categorical"`). 21 | 22 | The following semantic types are available: 23 | 24 | | `stype` | Explanation | Supported data types | Example | 25 | | -------------------- | ------------------------------------------------- | ---------------------------------------------- | ------------------------------------------------------------------------- | 26 | | `"numerical"` | Numerical values (e.g., `price`, `age`) | `int`, `float` | `25`, `3.14`, `-10` | 27 | | `"categorical"` | Discrete categories with limited cardinality | `int`, `float`, `string` | Color: `"red"`, `"blue"`, `"green"` (one cell may only have one category) | 28 | | `"multicategorical"` | Multiple categories in a single cell | `string`, `stringlist`, `intlist`, `floatlist` | `"Action\|Drama\|Comedy"`, `["Action", "Drama", "Comedy"]` | 29 | | `"ID"` | An identifier, e.g., primary keys or foreign keys | `int`, `float`, `string` | `123`, `PRD-8729453` | 30 | | `"text"` | Natural language text | `string` | Descriptions of products | 31 | | `"timestamp"` | Specific point in time | `date`, `string` | `"2025-07-11"`, `"2023-02-12 09:47:58"` | 32 | | `"sequence"` | Custom embeddings or sequential data | `floatlist`, `intlist` | `[0.25, -0.75, 0.50, ...]` | 33 | 34 | Upon table registration, semantic types of columns are estimated based on simple heuristics (e.g., data types, cardinality), but may not be ideal. 35 | For example, low cardinality columns may be mistakenly treated as `"categorical"` rather than `"numerical"`. 36 | You can use your world knowledge and common sense to analyze and correct such mistakes. 37 | 38 | If certain columns should be discarded, e.g., in case they have such high cardinality to make proper model generalization infeasible, a semantic type of `None` can be used to discard the column from being encoded. 39 | 40 | ### Primary Key 41 | 42 | The primary key is a unique identifier of each row in a table. 43 | Each table can have at most one primary key. 44 | If there are duplicated primary keys, the system will only keep the first one. 45 | A primary key can be used later to link tables through foreign key-primary key relationships. 46 | However, a primary key does not need to necessarily link to other tables. 47 | Setting a primary key will automatically assing the semantic type `"ID"` to this column. 48 | A primary key may not exist for all tables, but will be required whenever tables need to be linked together or whenever the table is used as the entity in a predictive query. 49 | 50 | ### Time Column 51 | 52 | A time column specifies the timestamp at which an event occured or when this row became active. 53 | It is used to prevent temporal leakage during subgraph sampling, i.e. for a given anchor time only events are preserved with timestamp less than or equal to the given anchor time. 54 | Time column data must obey to datetime format to be correctly parsed by `pandas.to_datetime`. 55 | Each table can have at most one time column. 56 | A time column may not exist for all tables, but will be required when predicting future aggregates over fact tables, e.g., the count of all orders in the next seven days. 57 | The system will only keep rows with non-N/A timestamps. 58 | In case there exists multiple time columns in the table, pick the column as time column that most likely refers to the create time of the event. 59 | For example, `create_time` should be preferred over `update_time`. 60 | 61 | ### End Time Column 62 | 63 | An end time column specifies the timestamp at which an event or row stopped being active. 64 | It is used to exclude in-context examples that have already expired relative to a given anchor time. 65 | End time column data must obey to datetime format to be correctly parsed by `pandas.to_datetime`. 66 | Each table can have at most one end time column. 67 | If both a time column and an end time column are present, they must refer to different columns in the dataset. 68 | An end time column is optional and typically only appears alongside a time column. 69 | 70 | ## Graph Schema 71 | 72 | Links between tables are defined via foreign key-primary key relationships, describing many-to-one relations. 73 | Links are the crucial bit that transform individual tables into a connected relational structure, enabling KumoRFM to understand and leverage relationships in your data. 74 | However, it is also possible to use KumoRFM in single table settings or within multiple disjoint graph schemas registered within the same graph. 75 | 76 | A link is defined by a source table (`source_table`), the foreign key column in the source table (`foreign_key`), and a destination table (`destination_table`) holding a primary key. 77 | For example, the `orders` source table may hold a foreign key `user_id` to link to the destination table `users`, holding a unique identifier for each user. 78 | Often times, links can be naturally inferred by inspecting the table schemas and relying on name matching to find meaningful connections. 79 | However, this may not always be the case. 80 | You can use your world knowledge and common sense to analyze meaningful connections between tables. 81 | 82 | Note that KumoRFM only supports foreign key-primary key links. 83 | In order to connect primary keys to primary keys, you have to remove the primary key in one of the tables. 84 | 85 | **Important:** Make sure that tables are correctly linked before proceeding. 86 | 87 | ## Graph Updates 88 | 89 | You can use the `update_graph_metadata` tool to perform partial graph schema updates by registering new tables, changing their semantic types, and linking them together. 90 | Note that all operations can be performed in a batch at once, e.g., one can add new tables and directly link them to together. 91 | 92 | ## Graph Visualization 93 | 94 | You can visualize the graph at any given point in time by rendering it as a Mermaid entity relationship diagram via the `get_mermaid` tool. 95 | Based on the number of columns in each table, it is recommended to set `show_columns` to `False` to avoid cluttering the diagram with less relevant details. 96 | 97 | ## Graph Materialization 98 | 99 | Once a graph is set up, you can materialize the graph to make it ready for model inference operations via the `materialize_graph` tool. 100 | This step creates the relational entity graph, which converts each row into a node, and each primary-foreign ky link into an edge. 101 | Most importantly, it converts the relational data into a data structure from which it can efficiently perform graph traversal and sample subgraphs, which are later used as inputs into the model. 102 | 103 | Any updates to the graph schema will require re-materializing the graph before the KumoRFM model can start making predictions again. 104 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Trigger Release 2 | 3 | description: > 4 | To release `vX.Y.0rc1`, run this workflow on `main`. 5 | To release `vX.Y.0rc{N+1}`, run this workflow on `vX.Y`. 6 | To release `vX.Y.0`, run this workflow on `vX.Y` with `switch-from-rc-to-final` set to `true`. 7 | To release `vX.Y.{Z+1}`, run this workflow on `vX.Y`. 8 | 9 | on: # yamllint disable-line rule:truthy 10 | workflow_dispatch: 11 | inputs: 12 | dry-run: 13 | type: boolean 14 | required: false 15 | default: true 16 | switch-from-rc-to-final: 17 | description: > 18 | Whether to switch from `vX.Y.0rcN` to `vX.Y.0`. Only applicable when 19 | triggering this workflow on vX.Y that has any RC release(s). 20 | type: boolean 21 | required: false 22 | default: false 23 | 24 | jobs: 25 | release: 26 | runs-on: ubuntu-latest 27 | steps: 28 | - uses: actions/checkout@v5 29 | with: 30 | # This token is necessary to push a commit and tag. 31 | token: ${{ secrets.KUMO_GITHUB_BOT_TOKEN }} 32 | fetch-depth: 0 # for verbosity 33 | fetch-tags: true # for verbosity 34 | 35 | - name: Configure release 36 | id: config 37 | run: | 38 | if [[ ${{ github.ref_name }} == main ]]; then 39 | echo "create-rc-from-dev=true" >> $GITHUB_OUTPUT 40 | elif [[ ${{ github.ref_name }} =~ ^v[0-9]+\.[0-9]+$ ]]; then 41 | if [[ ${{ inputs.switch-from-rc-to-final }} == true ]]; then 42 | echo "create-final-from-rc=true" >> $GITHUB_OUTPUT 43 | else 44 | is_rc=$(python -c 'exec(input()); print("rc" in __version__)' < kumo_rfm_mcp/_version.py) 45 | if [[ $is_rc == "True" ]]; then 46 | echo "create-rc-from-rc=true" >> $GITHUB_OUTPUT 47 | else 48 | echo "create-final-from-final=true" >> $GITHUB_OUTPUT 49 | fi 50 | fi 51 | else 52 | echo "Error: Unexpected branch name: ${{ github.ref_name }}" 53 | exit 1 54 | fi 55 | 56 | - uses: fregante/setup-git-user@024bc0b8e177d7e77203b48dab6fb45666854b35 # v2.0.2 57 | 58 | - name: Set up Node.js 59 | uses: actions/setup-node@v4 60 | 61 | - name: Create vX.Y and vX.Y.0rc1 from vX.Y.0.dev0 on main 62 | if: ${{ steps.config.outputs.create-rc-from-dev == 'true' }} 63 | run: | 64 | # Get the release candidate version: 65 | # - version_dev: X.Y.0.dev0 66 | # - version_rc: X.Y.0rc1 67 | # - version_final: X.Y.0 68 | version_dev=$(python -c 'exec(input()); print(__version__)' < kumo_rfm_mcp/_version.py) # X.Y.0.dev0 69 | version_rc=$(npx semver --coerce $version_dev)rc1 # X.Y.0.dev0 -> X.Y.0rc1 70 | version_final=$(npx semver --coerce $version_dev) # X.Y.0.dev0 -> X.Y.0 71 | 72 | # Update the version in kumo_rfm_mcp/_version.py 73 | echo "__version__ = '$version_rc'" > kumo_rfm_mcp/_version.py 74 | git diff --color=always 75 | git add kumo_rfm_mcp/_version.py 76 | git commit -m "Update version from $version_dev to $version_rc (https://github.com/kumo-ai/kumo/actions/runs/${{ github.run_id }})" 77 | 78 | # Create a release tag: 79 | git tag v$version_rc # vX.Y.0rc1 80 | 81 | # Create a new release branch vX.Y on the commit right before it 82 | # starts diverging from main: 83 | git checkout -b v${version_final//.0} 84 | 85 | # Update the version in kumo_rfm_mcp/_version.py again in main (vX.Y.0rc1 -> vX.{Y+1}.0.dev0) 86 | git checkout ${{ github.ref_name }} # This is normally main, but it's set to ref_name for dry-run. 87 | version_dev_new=$(npx semver -i minor $version_final).dev0 # X.Y.0 -> X.{Y+1}.0.dev0 88 | echo "__version__ = '$version_dev_new'" > kumo_rfm_mcp/_version.py 89 | git diff --color=always 90 | git add kumo_rfm_mcp/_version.py 91 | git commit -m "Update version from $version_rc to $version_dev_new (https://github.com/kumo-ai/kumo/actions/runs/${{ github.run_id }})" 92 | 93 | # Push updated main branch, the new vX.Y branch, and vX.Y.0rc1 tag 94 | # Note that ref_name is usually main, but it's set to ref_name for dry-run. 95 | echo UPDATED_REFS="${{ github.ref_name }} v${version_final//.0} v$version_rc" >> $GITHUB_ENV 96 | 97 | - name: Create vX.Y.0rc{N+1} from vX.Y.0rcN on vX.Y 98 | if: ${{ steps.config.outputs.create-rc-from-rc == 'true' }} 99 | run: | 100 | # Get the release candidate version: 101 | # - version_rc: X.Y.0rcN 102 | # - version_rc_new: X.Y.0rc{N+1} 103 | # - version_final: X.Y.0 104 | version_rc=$(python -c 'exec(input()); print(__version__)' < kumo_rfm_mcp/_version.py) # X.Y.0rcN 105 | version_final=$(npx semver --coerce $version_rc) # X.Y.0rcN -> X.Y.0 106 | n=${version_rc##*rc} # N 107 | version_rc_new=${version_final}rc$((n + 1)) # X.Y.0rc{N+1} 108 | 109 | # Update the version in kumo_rfm_mcp/_version.py 110 | echo "__version__ = '$version_rc_new'" > kumo_rfm_mcp/_version.py 111 | git diff --color=always 112 | git add kumo_rfm_mcp/_version.py 113 | git commit -m "Update version from $version_rc to $version_rc_new (https://github.com/kumo-ai/kumo/actions/runs/${{ github.run_id }})" 114 | 115 | # Create a release tag: 116 | git tag v$version_rc_new # vX.Y.0rc{N+1} 117 | 118 | # Push updated vX.Y branch and vX.Y.0rc{N+1} tag 119 | echo UPDATED_REFS="v${version_final//.0} v$version_rc_new" >> $GITHUB_ENV 120 | 121 | - name: Create vX.Y.0 from vX.Y.0rcN on vX.Y 122 | if: ${{ steps.config.outputs.create-final-from-rc == 'true' }} 123 | run: | 124 | # Get the final release version: 125 | # - version_rc: X.Y.0rcN 126 | # - version_final: X.Y.0 127 | version_rc=$(python -c 'exec(input()); print(__version__)' < kumo_rfm_mcp/_version.py) # X.Y.0rcN 128 | version_final=$(npx semver --coerce $version_rc) # X.Y.0rcN -> X.Y.0 129 | 130 | # Update the version in kumo_rfm_mcp/_version.py 131 | echo "__version__ = '$version_final'" > kumo_rfm_mcp/_version.py 132 | git diff --color=always 133 | git add kumo_rfm_mcp/_version.py 134 | git commit -m "Update version from $version_rc to $version_final (https://github.com/kumo-ai/kumo/actions/runs/${{ github.run_id }})" 135 | 136 | # Create a release tag: 137 | git tag v$version_final # vX.Y.0 138 | 139 | # Push updated vX.Y branch and vX.Y.0 tag 140 | echo UPDATED_REFS="v${version_final//.0} v$version_final" >> $GITHUB_ENV 141 | 142 | - name: Create vX.Y.{Z+1} from vX.Y.Z on vX.Y 143 | if: ${{ steps.config.outputs.create-final-from-final == 'true' }} 144 | run: | 145 | # Get the next patch release version: 146 | # - version_final: X.Y.Z 147 | # - version_final_new: X.Y.{Z+1} 148 | version_final=$(python -c 'exec(input()); print(__version__)' < kumo_rfm_mcp/_version.py) 149 | version_final=$(npx semver --coerce $version_final) # X.Y.Z -> X.Y.Z (just in case) 150 | version_final_new=$(npx semver -i patch $version_final) # X.Y.Z -> X.Y.{Z+1} 151 | 152 | # Update the version in kumo_rfm_mcp/_version.py: 153 | echo "__version__ = '$version_final_new'" > kumo_rfm_mcp/_version.py 154 | git diff --color=always 155 | git add kumo_rfm_mcp/_version.py 156 | git commit -m "Update version from $version_final to $version_final_new (https://github.com/kumo-ai/kumo/actions/runs/${{ github.run_id }})" 157 | 158 | # Create a release tag: 159 | git tag v$version_final_new # vX.Y.{Z+1} 160 | 161 | # Push updated vX.Y branch and vX.Y.{Z+1} tag 162 | echo UPDATED_REFS="${{ github.ref_name }} v$version_final_new" >> $GITHUB_ENV 163 | 164 | - name: Push changes to the remote repository 165 | run: | 166 | # For verbosity, show git tags and git logs: 167 | git tag --list --sort=-v:refname 168 | for ref in ${{ env.UPDATED_REFS }}; do 169 | echo "git log on $ref:" 170 | git log -n 10 --oneline --decorate $ref 171 | done 172 | 173 | if [[ ${{ inputs.dry-run }} == false ]]; then 174 | for ref in ${{ env.UPDATED_REFS }}; do 175 | git push origin $ref 176 | done 177 | fi 178 | -------------------------------------------------------------------------------- /kumo_rfm_mcp/config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Annotated, Any 3 | 4 | from kumoapi.rfm import Explanation 5 | from kumoapi.typing import Dtype, Stype 6 | from pydantic import BaseModel, Field 7 | 8 | 9 | class TableSource(BaseModel): 10 | """Source information of a table.""" 11 | path: Annotated[ 12 | Path, 13 | "Path to a local file. Only CSV or Parquet files are supported.", 14 | ] 15 | bytes: Annotated[int, "Size in bytes of the file"] 16 | 17 | 18 | class TableSourcePreview(BaseModel): 19 | """Preview of the first rows of a table-like file.""" 20 | rows: Annotated[ 21 | list[dict[str, Any]], 22 | Field( 23 | default_factory=list, 24 | description=("Each row in the table source is represented as a " 25 | "dictionary mapping column names to their " 26 | "corresponding values."), 27 | ), 28 | ] 29 | 30 | 31 | class TableMetadata(BaseModel): 32 | """Metadata for a table.""" 33 | path: Annotated[ 34 | str, 35 | ("Path to the table. Can be a local file path, an S3 URI " 36 | "(s3://...), or an HTTP/HTTPS URL."), 37 | ] 38 | name: Annotated[str, "Name of the table"] 39 | num_rows: Annotated[int, "Number of rows in the table"] 40 | dtypes: Annotated[ 41 | dict[str, Dtype], 42 | "Column names mapped to their data types", 43 | ] 44 | stypes: Annotated[ 45 | dict[str, Stype | None], 46 | "Column names mapped to their semantic types or `None` if they have " 47 | "been discarded", 48 | ] 49 | primary_key: Annotated[str | None, "Name of the primary key column"] 50 | time_column: Annotated[ 51 | str | None, 52 | "Name of the time column marking when the record becomes active", 53 | ] 54 | end_time_column: Annotated[ 55 | str | None, 56 | ("Name of the end time column marking when the record stops being " 57 | "active"), 58 | ] 59 | 60 | 61 | class AddTableMetadata(BaseModel): 62 | """Metadata to add a new table.""" 63 | path: Annotated[ 64 | str, 65 | ("Path to the table. Can be a local file path, an S3 URI " 66 | "(s3://...), or an HTTP/HTTPS URL."), 67 | ] 68 | name: Annotated[str, "Name of the table"] 69 | primary_key: Annotated[ 70 | str | None, 71 | Field( 72 | default=None, 73 | description="Name of the primary key column", 74 | ), 75 | ] 76 | time_column: Annotated[ 77 | str | None, 78 | Field( 79 | default=None, 80 | description=("Name of the time column marking when the record " 81 | "becomes active"), 82 | ), 83 | ] 84 | end_time_column: Annotated[ 85 | str | None, 86 | Field( 87 | default=None, 88 | description=("Name of the end time column marking when the record " 89 | "stops being active"), 90 | ), 91 | ] 92 | 93 | 94 | class UpdateTableMetadata(BaseModel): 95 | """Metadata updates to perform for a table.""" 96 | stypes: Annotated[ 97 | dict[str, Stype | None], 98 | Field( 99 | default_factory=dict, 100 | description=("Update the semantic type of column names. Set to " 101 | "`None` if the column should be discarded. Omitted " 102 | "columns will be untouched."), 103 | ), 104 | ] 105 | primary_key: Annotated[ 106 | str | None, 107 | Field( 108 | default=None, 109 | description=("Update the primary key column. Set to `None` if the " 110 | "primary key should be discarded. If omitted, the " 111 | "current primary key will be untouched."), 112 | ), 113 | ] 114 | time_column: Annotated[ 115 | str | None, 116 | Field( 117 | default=None, 118 | description=("Update the time column. Set to `None` if the time " 119 | "column should be discarded. If omitted, the current " 120 | "time column will be untouched."), 121 | ), 122 | ] 123 | end_time_column: Annotated[ 124 | str | None, 125 | Field( 126 | default=None, 127 | description=("Update the end time column. Set to `None` if the " 128 | "end time column should be discarded. If omitted, " 129 | "the current end time column will be untouched."), 130 | ), 131 | ] 132 | 133 | 134 | class LinkMetadata(BaseModel): 135 | """Metadata for defining a link between two tables via foreign key-primary 136 | key relationships.""" 137 | source_table: Annotated[ 138 | str, 139 | "Name of the source table containing the foreign key", 140 | ] 141 | foreign_key: Annotated[str, "Name of the foreign key column"] 142 | destination_table: Annotated[ 143 | str, 144 | "Name of the destination table containing the primary key to link to", 145 | ] 146 | 147 | 148 | class GraphMetadata(BaseModel): 149 | """Metadata of a graph holding multiple tables connected via foreign 150 | key-primary key relationships.""" 151 | tables: Annotated[list[TableMetadata], "List of tables"] 152 | links: Annotated[list[LinkMetadata], "List of links"] 153 | 154 | 155 | class UpdateGraphMetadata(BaseModel): 156 | """Metadata updates to perform for a graph holding multiple tables 157 | connected via foreign key-primary key relationships.""" 158 | tables_to_add: Annotated[ 159 | list[AddTableMetadata], 160 | Field(default_factory=list, description="Tables to add"), 161 | ] 162 | tables_to_update: Annotated[ 163 | dict[str, UpdateTableMetadata], 164 | Field( 165 | default_factory=dict, 166 | description="Tables to update. Omitted tables will be untouched.", 167 | ), 168 | ] 169 | links_to_remove: Annotated[ 170 | list[LinkMetadata], 171 | Field(default_factory=list, description="Links to remove"), 172 | ] 173 | links_to_add: Annotated[ 174 | list[LinkMetadata], 175 | Field(default_factory=list, description="Links to add"), 176 | ] 177 | tables_to_remove: Annotated[ 178 | list[str], 179 | Field(default_factory=list, description="Tables to remove"), 180 | ] 181 | 182 | 183 | class UpdatedGraphMetadata(BaseModel): 184 | """Updated metadata of a graph holding multiple tables connected via " 185 | "foreign key-primary key relationships.""" 186 | graph: Annotated[GraphMetadata, "Updated graph metadata"] 187 | errors: Annotated[ 188 | list[str], 189 | Field( 190 | default_factory=list, 191 | description="Any errors encountered during the update process", 192 | ), 193 | ] 194 | 195 | 196 | class MaterializedGraphInfo(BaseModel): 197 | """Information about the materialized graph.""" 198 | num_nodes: Annotated[int, "Number of nodes in the graph"] 199 | num_edges: Annotated[int, "Number of edges in the graph"] 200 | time_ranges: Annotated[ 201 | dict[str, str], 202 | Field( 203 | default_factory=dict, 204 | description=("Earliest to latest timestamp for each table in the " 205 | "graph that contains a time column"), 206 | ), 207 | ] 208 | 209 | 210 | class PredictResponse(BaseModel): 211 | predictions: Annotated[ 212 | list[dict[str, Any]], 213 | Field( 214 | default_factory=list, 215 | description=( 216 | "The predictions, where each row holds information about the " 217 | "entity, the anchor time, and the prediction scores"), 218 | ), 219 | ] 220 | logs: Annotated[ 221 | list[str], 222 | Field( 223 | default_factory=list, 224 | description=("Prediction-specific log messages such as number of " 225 | "context examples, the underlying task type and the " 226 | "label distribution"), 227 | ), 228 | ] 229 | 230 | 231 | class EvaluateResponse(BaseModel): 232 | metrics: Annotated[ 233 | dict[str, float | None], 234 | Field( 235 | default_factory=dict, 236 | description="The metric value for every metric", 237 | ), 238 | ] 239 | logs: Annotated[ 240 | list[str], 241 | Field( 242 | default_factory=list, 243 | description=("Evaluation-specific log messages such as number of " 244 | "context and test examples, the underlying task type " 245 | "and the label distribution"), 246 | ), 247 | ] 248 | 249 | 250 | class ExplanationResponse(BaseModel): 251 | prediction: Annotated[ 252 | dict[str, Any], 253 | ("The prediction, holding information about the entity, the anchor " 254 | "time, and the prediction scores"), 255 | ] 256 | explanation: Annotated[ 257 | Explanation, 258 | ("The explanation of the prediction. Provides both a global, " 259 | "column-level analysis and a local, cell-level attribution view. " 260 | "The global analysis clusters column distributions of in-context " 261 | "examples into cohorts and relates them to their relevance with " 262 | "respect to ground-truth labels. The local view computes " 263 | "gradient-based attribution scores over prediction subgraphs. " 264 | "Together, these views enable comprehensive interpretation."), 265 | ] 266 | logs: Annotated[ 267 | list[str], 268 | Field( 269 | default_factory=list, 270 | description=("Prediction-specific log messages such as number of " 271 | "context examples, the underlying task type and the " 272 | "label distribution"), 273 | ), 274 | ] 275 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
8 | KumoRFM • 9 | Notebooks • 10 | Blog • 11 | Get an API key 12 |
13 | 14 | [](https://pypi.org/project/kumo-rfm-mcp/) 15 | [](https://pypi.org/project/kumo-rfm-mcp/) 16 | [](https://join.slack.com/t/kumoaibuilders/shared_invite/zt-2z9uih3lf-fPM1z2ACZg~oS3ObmiQLKQ) 17 | 18 | 🔬 MCP server to query [KumoRFM](https://kumorfm.ai) in your agentic flows 19 | 20 |
68 |
69 | The MCP Bundle supports Linux, macOS and Windows, but requires a Python executable to be found in order to create a separate new virtual environment.
70 |
71 | ### Claude code
72 |
73 | To include the server in claude code use:
74 |
75 | ```
76 | claude mcp add --transport stdio kumo-rfm-mcp --env KUMO_API_KEY=|
92 |
93 | 96 | [Example] 97 | |
98 | |
117 |
|---|---|
|
120 |
121 | 128 | [Example] 129 | |
130 | |
147 |
|
150 |
151 | 158 | [Example] 159 | |
160 | |
175 |
|
178 |
179 | |
186 | |
207 |