├── src └── mcp-rdf-explorer │ ├── __init__.py │ └── server.py ├── requirements.txt ├── LICENSE ├── test_rdf_explorer.py └── README.md /src/mcp-rdf-explorer/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | fastmcp==0.4.1 2 | rdflib[sparql] 3 | requests 4 | feedparser 5 | tiktoken -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Emeka Azuka Okoye 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test_rdf_explorer.py: -------------------------------------------------------------------------------- 1 | # pytest test_rdf_explorer.py -v 2 | 3 | import pytest 4 | from server import triplestore_lifespan, get_mode, FastMCP, Context 5 | from unittest.mock import AsyncMock, patch 6 | import rdflib 7 | 8 | # Mock fixtures (unchanged) 9 | @pytest.fixture 10 | def mock_mcp(): 11 | mcp = FastMCP("Test") 12 | mcp._lifespan_context = { 13 | "active_external_endpoint": None, 14 | "triple_file": "test.ttl", 15 | "sparql_endpoint": "" 16 | } 17 | return mcp 18 | 19 | @pytest.fixture 20 | def mock_context(mock_mcp): 21 | class MockRequestContext: 22 | lifespan_context = mock_mcp._lifespan_context 23 | class MockContext: 24 | request_context = MockRequestContext() 25 | return MockContext() 26 | 27 | # Existing tests (unchanged) 28 | def test_get_mode_local(mock_context): 29 | result = get_mode(mock_context) 30 | assert result == "Local File Mode with Dataset: 'test.ttl'" 31 | 32 | def test_get_mode_sparql(mock_context): 33 | mock_context.request_context.lifespan_context["active_external_endpoint"] = "https://dbpedia.org/sparql" 34 | mock_context.request_context.lifespan_context["sparql_endpoint"] = "https://dbpedia.org/sparql" 35 | result = get_mode(mock_context) 36 | assert result == "SPARQL Endpoint Mode with Endpoint: 'https://dbpedia.org/sparql'" 37 | 38 | @pytest.mark.asyncio 39 | async def test_triplestore_lifespan_local(): 40 | mock_server = AsyncMock() 41 | with patch("rdflib.Graph.parse") as mock_parse: 42 | mock_parse.return_value = rdflib.Graph() 43 | async with triplestore_lifespan(mock_server, "test.ttl", "") as context: 44 | assert context["triple_file"] == "test.ttl" 45 | assert context["active_external_endpoint"] is None 46 | assert isinstance(context["graph"], rdflib.Graph) 47 | 48 | @pytest.mark.asyncio 49 | async def test_triplestore_lifespan_sparql(): 50 | mock_server = AsyncMock() 51 | with patch("rdflib.plugins.stores.sparqlstore.SPARQLStore.query") as mock_query: 52 | mock_query.return_value = [] 53 | async with triplestore_lifespan(mock_server, "test.ttl", "https://dbpedia.org/sparql") as context: 54 | assert context["sparql_endpoint"] == "https://dbpedia.org/sparql" 55 | assert context["active_external_endpoint"] == "https://dbpedia.org/sparql" 56 | assert "graph" in context 57 | 58 | # New failure tests 59 | @pytest.mark.asyncio 60 | async def test_triplestore_lifespan_sparql_failure(): 61 | mock_server = AsyncMock() 62 | with patch("rdflib.plugins.stores.sparqlstore.SPARQLStore.query") as mock_query: 63 | mock_query.side_effect = Exception("Connection failed") 64 | with pytest.raises(Exception, match="Connection failed"): 65 | async with triplestore_lifespan(mock_server, "test.ttl", "http://invalid.endpoint"): 66 | pass 67 | 68 | @pytest.mark.asyncio 69 | async def test_triplestore_lifespan_local_file_missing(): 70 | mock_server = AsyncMock() 71 | with patch("rdflib.Graph.parse") as mock_parse: 72 | mock_parse.side_effect = FileNotFoundError("File not found") 73 | with pytest.raises(FileNotFoundError, match="File not found"): 74 | async with triplestore_lifespan(mock_server, "missing.ttl", ""): 75 | pass -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RDF Explorer v1.0.0 2 | 3 | ## Overview 4 | A Model Context Protocol (MCP) server that provides conversational interface for the exploration and analysis of RDF (Turtle) based Knowledge Graph in Local File mode or SPARQL Endpoint mode. This server facilitates communication between AI applications (hosts/clients) and RDF data, making graph exploration and analyzing graph data through SPARQL queries. A perfect tool for knowledge graph research and AI data preparation. 5 | 6 | 7 | ## Components 8 | 9 | ### Tools 10 | The server implements SPARQL queries and search functionality: 11 | 12 | - `execute_on_endpoint` 13 | - Execute a SPARQL query directly on an external endpoint 14 | - Input: 15 | - `endpoint` (str): The SPARQL endpoint URL to query. 16 | - `query` (str): The SPARQL query to execute. 17 | - `ctx` (Context): The FastMCP context object. 18 | - Returns: Query results as a newline-separated string, or an error message. 19 | 20 | - `sparql_query` 21 | - Execute a SPARQL query on the current graph or active external endpoint 22 | - Input: 23 | - `query` (str): The SPARQL query to execute. 24 | - `ctx` (Context): The FastMCP context object. 25 | - `use_service` (bool): Whether to use a SERVICE clause for federated queries in local mode (default: True). 26 | - Returns: Query results as a newline-separated string, or an error message. 27 | 28 | - `graph_stats` 29 | - Calculate and return statistics about the graph in JSON format 30 | - Input: 31 | - `ctx` (Context): The FastMCP context object. 32 | - Returns: JSON string containing graph statistics (e.g., triple count, unique subjects). 33 | 34 | - `count_triples` 35 | - Count triples in the graph. Disabled in SPARQL Endpoint Mode; use a custom prompt instead. 36 | - Input: 37 | - `ctx` (Context): The FastMCP context object. 38 | - Returns: Number of triples as a string, or an error message. 39 | 40 | 41 | - `full_text_search` 42 | - Perform a full-text search on the graph or endpoint, avoiding proprietary syntax. 43 | - Input: 44 | - `search_term` (str): The term to search for. 45 | - `ctx` (Context): The FastMCP context object. 46 | - Returns: Search results as a newline-separated string, or an error message. 47 | 48 | 49 | - `health_check` 50 | - Check the health of the triplestore connection. 51 | - Input: 52 | - `ctx` (Context): The FastMCP context object. 53 | - Returns: 'Healthy' if the connection is good, 'Unhealthy: ' otherwise. 54 | 55 | 56 | - `get_mode` 57 | - Get the current mode of RDF Explorer. Useful for knowledge graph and semantic tech users to verify data source. 58 | - Input: 59 | - `ctx` (Context): The FastMCP context object. 60 | - Returns: A message indicating the mode and dataset or endpoint. 61 | 62 | 63 | ### Resources 64 | 65 | The server exposes the following resources: 66 | - `schema://all`: Retrieve schema information (classes and properties) from the graph. 67 | - Returns: A newline-separated string of schema elements (classes and properties). 68 | 69 | - `queries://{template_name}`: Retrieve a predefined SPARQL query template by name. 70 | - Returns: The SPARQL query string or 'Template not found'. 71 | 72 | - `explore://{query_name}`: Execute an exploratory SPARQL query by name and return results in JSON. 73 | - `query_name` (str): The name of the exploratory query (e.g., 'classes', 'relationships/URI'). 74 | - Returns: JSON string of query results. 75 | 76 | - `explore://report`: Generate a Markdown report of exploratory queries. 77 | - Returns: A Markdown-formatted report string. 78 | 79 | 80 | 81 | ### Prompts 82 | 83 | The server exposes the following prompts: 84 | - `analyze_graph_structure`: Initiate an analysis of the graph structure with schema data. 85 | - Returns: A list of messages to guide graph structure analysis. 86 | 87 | - `find_relationships`: Generate a SPARQL query to find relationships for a given subject. 88 | - Returns: A SPARQL query string to find relationships. 89 | 90 | - `text_to_sparql`: Convert a text prompt to a SPARQL query and execute it, with token limit checks. 91 | - `prompt` (str): The text prompt to convert to SPARQL. 92 | - Returns: Query results with usage stats, or an error message. 93 | 94 | 95 | 96 | 97 | ## Setup 98 | 99 | ## Configuration 100 | 101 | ### Installing on Claude Desktop 102 | Before starting make sure [Claude Desktop](https://claude.ai/download) is installed. 103 | 1. Go to: Settings > Developer > Edit Config 104 | 105 | 2. Add the following to your `claude_desktop_config.json`: 106 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json` 107 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json` 108 | 109 | To use with a local RDF Turtle file, use this version with `--triple-file` args 110 | ```json 111 | { 112 | "mcpServers": { 113 | "rdf_explorer": { 114 | "command": "C:\\path\\to\\venv\\Scripts\\python.exe", 115 | "args": ["C:\\path\\to\\server.py", "--triple-file", "your_file.ttl"] 116 | } 117 | } 118 | } 119 | ``` 120 | 121 | To use with a SPARQL Endpoint, use this version with `--sparql-endpoint` args 122 | ```json 123 | { 124 | "mcpServers": { 125 | "rdf_explorer": { 126 | "command": "C:\\path\\to\\venv\\Scripts\\python.exe", 127 | "args": ["C:\\path\\to\\server.py", "--sparql-endpoint", "https://example.com/sparql"] 128 | } 129 | } 130 | } 131 | ``` 132 | 133 | 3. Restart Claude Desktop and start querying and exploring graph data. 134 | 135 | 4. Prompt: "what mode is RDF Explorer running?" 136 | 137 | 138 | 139 | 140 | ## Usage Examples 141 | 142 | Here are examples of how you can explore RDF data using natural language: 143 | 144 | ### Querying Data in Local File Mode 145 | 146 | You can ask questions like: 147 | - "Show me all employees in the Sales department" 148 | - "Find the top 5 oldest customers" 149 | - "Who has purchased more than 3 products in the last month?" 150 | - "List all entities" 151 | - "Using the DBpedia endpoint, list 10 songs by Michael Jackson" 152 | - "Using the Wikidata endpoint, list 5 cities" 153 | - "count the triples" 154 | - "analyze the graph structure" 155 | - "Select ..." 156 | - "search '{text}' " 157 | - "find relationships of '{URI}'" 158 | - "what mode is RDF Explorer running?" 159 | 160 | ### Querying Data in SPARQL Endpoint Mode 161 | 162 | You can ask questions like: 163 | - "Using the DBpedia endpoint, list 10 songs by Michael Jackson" 164 | - "Using the Wikidata endpoint, list 5 cities" 165 | - "Select ..." 166 | - "search '{text}' " 167 | - "find relationships of '{URI}'" 168 | - "what mode is RDF Explorer running?" 169 | 170 | ## Development 171 | ``` 172 | # clone the repository 173 | git clone https://github.com/emekaokoye/mcp-rdf-explorer.git 174 | cd mcp-rdf-explorer 175 | 176 | # setup a virtual environment 177 | python -m venv venv 178 | source venv/bin/activate 179 | # windows: venv\Scripts\activate 180 | 181 | # install development dependencies 182 | pip install -r requirements.txt 183 | 184 | # run tests 185 | pytest test_rdf_explorer.py -v 186 | ``` 187 | 188 | ## License 189 | 190 | This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the [license](LICENSE) file in the project repository. 191 | 192 | -------------------------------------------------------------------------------- /src/mcp-rdf-explorer/server.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import asyncio 3 | import json 4 | import logging 5 | import sys 6 | import time 7 | import tiktoken 8 | from contextlib import asynccontextmanager 9 | from collections.abc import AsyncIterator 10 | from typing import Dict, Any 11 | 12 | import rdflib 13 | import requests 14 | import feedparser 15 | from mcp.server.fastmcp import FastMCP, Context 16 | from mcp.server.fastmcp.prompts import base 17 | import os 18 | 19 | # Configure logging at the start 20 | logger = logging.getLogger(__name__) 21 | 22 | if not logger.handlers: 23 | logging.basicConfig( 24 | level=logging.INFO, 25 | format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", 26 | handlers=[logging.StreamHandler(sys.stderr)] 27 | ) 28 | 29 | # Check for SPARQLStore availability 30 | try: 31 | from rdflib.plugins.stores.sparqlstore import SPARQLStore 32 | HAS_SPARQLSTORE = True 33 | except ImportError: 34 | HAS_SPARQLSTORE = False 35 | logger.warning("SPARQLStore not available. SPARQL Endpoint Mode and external queries will be disabled.") 36 | 37 | # Parse command-line arguments 38 | parser = argparse.ArgumentParser(description="RDF Explorer MCP Server v1.0.0") 39 | parser.add_argument("--triple-file", default="", help="Path to the local RDF triple file") 40 | parser.add_argument("--sparql-endpoint", default="", help="SPARQL endpoint URL (empty for Local File Mode)") 41 | args = parser.parse_args() 42 | 43 | logger.info("Starting RDF Explorer MCP Server v1.0.0") 44 | logger.info("Setting lifespan") 45 | 46 | # Define MCP instance as a global to ensure it's accessible 47 | mcp = FastMCP( 48 | "RDF Explorer", 49 | dependencies=["rdflib[sparql]", "requests", "feedparser", "tiktoken"], 50 | lifespan=lambda mcp: triplestore_lifespan(mcp, args.triple_file, args.sparql_endpoint) 51 | ) 52 | 53 | @asynccontextmanager 54 | async def triplestore_lifespan(server: FastMCP, triple_file: str, sparql_endpoint: str) -> AsyncIterator[Dict[str, Any]]: 55 | """Manage the lifespan of the triplestore, initializing and shutting down the graph connection. 56 | 57 | Args: 58 | server (FastMCP): The FastMCP server instance. 59 | triple_file (str): Path to the local RDF triple file. 60 | sparql_endpoint (str): URL of the SPARQL endpoint, if any. 61 | 62 | Yields: 63 | Dict[str, Any]: Context dictionary containing the graph, metrics, and other state. 64 | 65 | Raises: 66 | FileNotFoundError: If the triple file is not found. 67 | Exception: If connecting to the SPARQL endpoint or parsing the file fails. 68 | """ 69 | logger.info(f"Initializing triplestore with triple_file={triple_file}, sparql_endpoint={sparql_endpoint}") 70 | 71 | metrics = {"queries": 0, "total_time": 0.0} 72 | external_stores = {} 73 | feed_graph = rdflib.Graph() 74 | active_external_endpoint = None 75 | max_tokens = 10000 76 | 77 | if sparql_endpoint and HAS_SPARQLSTORE: 78 | logger.info(f"Connecting to SPARQL endpoint: {sparql_endpoint}") 79 | try: 80 | graph = SPARQLStore(query_endpoint=sparql_endpoint) 81 | graph.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1") 82 | external_stores[sparql_endpoint] = graph 83 | active_external_endpoint = sparql_endpoint 84 | logger.info(f"Successfully connected to {sparql_endpoint}") 85 | except Exception as e: 86 | logger.error(f"Failed to connect to SPARQL endpoint: {str(e)}") 87 | raise 88 | else: 89 | graph = rdflib.Graph() 90 | file_path = os.path.join(os.path.dirname(__file__), triple_file) 91 | logger.info(f"Loading local RDF file: {file_path}") 92 | try: 93 | graph.parse(file_path, format="turtle") 94 | logger.info(f"Loaded {len(graph)} triples from local file") 95 | except FileNotFoundError: 96 | logger.error(f"RDF file not found: {file_path}") 97 | raise 98 | except Exception as e: 99 | logger.error(f"Failed to load RDF file: {str(e)}") 100 | raise 101 | 102 | try: 103 | logger.info("Triplestore initialized successfully") 104 | yield { 105 | "graph": graph, 106 | "metrics": metrics, 107 | "external_stores": external_stores, 108 | "feed_graph": feed_graph, 109 | "active_external_endpoint": active_external_endpoint, 110 | "max_tokens": max_tokens, 111 | "triple_file": triple_file, 112 | "sparql_endpoint": sparql_endpoint 113 | } 114 | finally: 115 | logger.info("Shutting down triplestore connection") 116 | if sparql_endpoint and HAS_SPARQLSTORE and sparql_endpoint in external_stores: 117 | external_stores[sparql_endpoint].close() 118 | 119 | # Resources 120 | @mcp.resource("graph://{graph_id}") 121 | def get_graph(graph_id: str) -> str: 122 | """Retrieve a graph by ID and serialize it in Turtle format. 123 | 124 | Args: 125 | graph_id (str): Identifier for the graph (currently unused, returns main graph). 126 | 127 | Returns: 128 | str: The serialized graph in Turtle format. 129 | 130 | Raises: 131 | Exception: If serialization fails. 132 | """ 133 | logger.debug(f"Fetching graph for graph_id: {graph_id}") 134 | graph = mcp._lifespan_context["graph"] 135 | try: 136 | if HAS_SPARQLSTORE and isinstance(graph, rdflib.SPARQLStore): 137 | results = graph.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100") 138 | temp_graph = rdflib.Graph() 139 | for s, p, o in results: 140 | temp_graph.add((s, p, o)) 141 | return temp_graph.serialize(format="turtle") 142 | return graph.serialize(format="turtle") 143 | except Exception as e: 144 | logger.error(f"Error serializing graph {graph_id}: {str(e)}") 145 | raise 146 | 147 | @mcp.resource("feed://all") 148 | def get_feed_graph() -> str: 149 | """Retrieve the feed graph stored by explore_url in Turtle format. 150 | 151 | Returns: 152 | str: The serialized feed graph in Turtle format. 153 | 154 | Raises: 155 | Exception: If serialization of the feed graph fails. 156 | """ 157 | logger.debug("Fetching feed graph") 158 | feed_graph = mcp._lifespan_context["feed_graph"] 159 | try: 160 | return feed_graph.serialize(format="turtle") 161 | except Exception as e: 162 | logger.error(f"Error serializing feed graph: {str(e)}") 163 | raise 164 | 165 | 166 | @mcp.resource("schema://all") 167 | def get_schema() -> str: 168 | """Retrieve schema information (classes and properties) from the graph. 169 | Returns: 170 | str: A newline-separated string of schema elements (classes and properties). 171 | Raises: 172 | Exception: If the schema query fails. 173 | """ 174 | graph = mcp._lifespan_context["graph"] 175 | schema_query = """ 176 | PREFIX rdfs: 177 | PREFIX owl: 178 | PREFIX rdf: 179 | SELECT DISTINCT ?entity ?type 180 | WHERE { 181 | { ?entity a rdfs:Class . BIND("rdfs:Class" as ?type) } UNION 182 | { ?entity a owl:Class . BIND("owl:Class" as ?type) } UNION 183 | { ?entity a rdf:Property . BIND("rdf:Property" as ?type) } UNION 184 | { ?entity a owl:ObjectProperty . BIND("owl:ObjectProperty" as ?type) } UNION 185 | { ?entity a owl:DatatypeProperty . BIND("owl:DatatypeProperty" as ?type) } UNION 186 | { ?entity a owl:AnnotationProperty . BIND("owl:AnnotationProperty" as ?type) } 187 | } 188 | ORDER BY ?type ?entity 189 | LIMIT 100 190 | """ 191 | try: 192 | results = graph.query(schema_query) 193 | return "\n".join(f"{row['type']}: {row['entity']}" for row in results) 194 | except Exception as e: 195 | logger.error(f"Schema query error: {str(e)}") 196 | raise 197 | 198 | 199 | @mcp.resource("queries://{template_name}") 200 | def get_query_template(template_name: str) -> str: 201 | """Retrieve a predefined SPARQL query template by name. 202 | 203 | Args: 204 | template_name (str): The name of the query template (e.g., 'orphans', 'cycles'). 205 | 206 | Returns: 207 | str: The SPARQL query string or 'Template not found' if the name is invalid. 208 | """ 209 | templates = { 210 | "orphans": "SELECT ?s WHERE { ?s ?p ?o . FILTER NOT EXISTS { ?x ?y ?s } } LIMIT 100", 211 | "cycles": "SELECT ?s ?o WHERE { ?s ?p ?o . ?o ?q ?s } LIMIT 100" 212 | } 213 | return templates.get(template_name, "Template not found") 214 | 215 | @mcp.resource("explore://{query_name}") 216 | def exploratory_query(query_name: str) -> str: 217 | """Execute an exploratory SPARQL query by name and return results in JSON. 218 | 219 | Args: 220 | query_name (str): The name of the exploratory query (e.g., 'classes', 'relationships/URI'). 221 | 222 | Returns: 223 | str: JSON string of query results. 224 | 225 | Raises: 226 | Exception: If the query execution fails. 227 | """ 228 | graph = mcp._lifespan_context["graph"] 229 | queries = { 230 | "classes": "SELECT DISTINCT ?type ?label WHERE { ?s a ?type . OPTIONAL { ?type rdfs:label ?label } } LIMIT 100", 231 | "properties": "SELECT DISTINCT ?objprop ?label WHERE { ?objprop a owl:ObjectProperty . OPTIONAL { ?objprop rdfs:label ?label } } LIMIT 100", 232 | "data_properties": "SELECT DISTINCT ?dataprop ?label WHERE { ?dataprop a owl:DatatypeProperty . OPTIONAL { ?dataprop rdfs:label ?label } } LIMIT 100", 233 | "used_properties": "SELECT DISTINCT ?p ?label WHERE { ?s ?p ?o . OPTIONAL { ?p rdfs:label ?label } } LIMIT 100", 234 | "entities": "SELECT DISTINCT ?entity ?elabel ?type ?tlabel WHERE { ?entity a ?type . OPTIONAL { ?entity rdfs:label ?elabel } . OPTIONAL { ?type rdfs:label ?tlabel } } LIMIT 100", 235 | "top_predicates": "SELECT ?pred (COUNT(*) as ?triples) WHERE { ?s ?pred ?o } GROUP BY ?pred ORDER BY DESC(?triples) LIMIT 100", 236 | "class_counts": "SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class ORDER BY ?count LIMIT 100", 237 | "property_counts": "SELECT ?p (COUNT(?s) AS ?count) WHERE { ?s ?p ?o } GROUP BY ?p ORDER BY ?count LIMIT 100" 238 | } 239 | if query_name.startswith("relationships/"): 240 | subject = query_name.split("/", 1)[1] 241 | query = f"SELECT ?predicate ?object WHERE {{ <{subject}> ?predicate ?object }} LIMIT 100" 242 | else: 243 | query = queries.get(query_name, "Query not found") 244 | try: 245 | results = graph.query(query) 246 | return json.dumps([dict(row) for row in results]) 247 | except Exception as e: 248 | logger.error(f"Exploratory query error: {str(e)}") 249 | raise 250 | 251 | @mcp.resource("explore://report") 252 | def exploratory_report() -> str: 253 | """Generate a Markdown report of exploratory queries. 254 | 255 | Returns: 256 | str: A Markdown-formatted report string. 257 | 258 | Raises: 259 | Exception: If any query in the report generation fails (error included in report). 260 | """ 261 | graph = mcp._lifespan_context["graph"] 262 | report = ["# RDF Exploration Report"] 263 | for name in ["classes", "used_properties", "top_predicates"]: 264 | try: 265 | results = graph.query(mcp.call_resource(f"explore://{name}")) 266 | report.append(f"## {name.replace('_', ' ').title()}") 267 | report.append("| " + " | ".join(results.vars) + " |") 268 | report.append("| " + " | ".join(["---"] * len(results.vars)) + " |") 269 | for row in results: 270 | report.append("| " + " | ".join(str(row[var]) for var in results.vars) + " |") 271 | except Exception as e: 272 | report.append(f"## {name.replace('_', ' ').title()}\nError: {str(e)}") 273 | return "\n".join(report) 274 | 275 | @mcp.resource("metrics://status") 276 | def get_metrics() -> str: 277 | """Retrieve server metrics in JSON format. 278 | 279 | Returns: 280 | str: JSON string containing query count and total execution time. 281 | """ 282 | metrics = mcp._lifespan_context["metrics"] 283 | return json.dumps(metrics) 284 | 285 | # Tools 286 | @mcp.tool() 287 | def set_max_tokens(tokens: int, ctx: Context) -> str: 288 | """Set the maximum token limit for prompts. 289 | 290 | Args: 291 | tokens (int): The new maximum token limit (must be positive). 292 | ctx (Context): The FastMCP context object. 293 | 294 | Returns: 295 | str: Confirmation message or error if the value is invalid. 296 | """ 297 | if tokens <= 0: 298 | return "Error: MAX_TOKENS must be positive." 299 | ctx.request_context.lifespan_context["max_tokens"] = tokens 300 | logger.info(f"Set MAX_TOKENS to {tokens}") 301 | return f"MAX_TOKENS set to {tokens}" 302 | 303 | @mcp.tool() 304 | def execute_on_endpoint(endpoint: str, query: str, ctx: Context) -> str: 305 | """Execute a SPARQL query directly on an external endpoint. 306 | 307 | Args: 308 | endpoint (str): The SPARQL endpoint URL to query. 309 | query (str): The SPARQL query to execute. 310 | ctx (Context): The FastMCP context object. 311 | 312 | Returns: 313 | str: Query results as a newline-separated string, or an error message if SPARQLStore is unavailable or the query fails. 314 | """ 315 | if not HAS_SPARQLSTORE: 316 | return "SPARQLStore not available. Cannot query external endpoints." 317 | try: 318 | store = rdflib.SPARQLStore(query_endpoint=endpoint) 319 | results = store.query(query) 320 | logger.debug(f"Executed query on endpoint {endpoint}: {query}") 321 | return "\n".join(str(row) for row in results) 322 | except Exception as e: 323 | logger.error(f"Direct endpoint query error: {str(e)}") 324 | return f"Query error: {str(e)}" 325 | 326 | @mcp.tool() 327 | def connect_external_triplestore(endpoint: str, ctx: Context) -> str: 328 | """Connect to an external SPARQL endpoint and optionally set it as active for local mode queries. 329 | 330 | Args: 331 | endpoint (str): The SPARQL endpoint URL to connect to. 332 | ctx (Context): The FastMCP context object. 333 | 334 | Returns: 335 | str: Connection status message. 336 | 337 | Raises: 338 | Exception: If connecting to the endpoint fails. 339 | """ 340 | if not HAS_SPARQLSTORE: 341 | return "SPARQLStore not available. Cannot connect to external endpoints." 342 | try: 343 | store = rdflib.SPARQLStore(query_endpoint=endpoint) 344 | store.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1") 345 | ctx.request_context.lifespan_context["external_stores"][endpoint] = store 346 | if not ctx.request_context.lifespan_context["active_external_endpoint"]: 347 | ctx.request_context.lifespan_context["active_external_endpoint"] = endpoint 348 | logger.info(f"Set active external endpoint to {endpoint} for local mode") 349 | return f"Connected to {endpoint} and set as active endpoint for local mode queries" 350 | else: 351 | logger.info(f"Connected to {endpoint} but not set as active (SPARQL endpoint mode active)") 352 | return f"Connected to {endpoint} (use SERVICE clause manually in SPARQL endpoint mode)" 353 | except Exception as e: 354 | logger.error(f"External triplestore connection error: {str(e)}") 355 | raise 356 | 357 | @mcp.tool() 358 | def sparql_query(query: str, ctx: Context, use_service: bool = True) -> str: 359 | """Execute a SPARQL query on the current graph or active external endpoint. 360 | 361 | Args: 362 | query (str): The SPARQL query to execute. 363 | ctx (Context): The FastMCP context object. 364 | use_service (bool): Whether to use a SERVICE clause for federated queries in local mode (default: True). 365 | 366 | Returns: 367 | str: Query results as a newline-separated string, or an error message if the query fails. 368 | """ 369 | graph = ctx.request_context.lifespan_context["graph"] 370 | active_external_endpoint = ctx.request_context.lifespan_context["active_external_endpoint"] 371 | start_time = time.time() 372 | try: 373 | if not active_external_endpoint and active_external_endpoint and use_service: 374 | wrapped_query = f"SELECT ?s WHERE {{ SERVICE <{active_external_endpoint}> {{ {query} }} }}" 375 | logger.debug(f"Executing federated query in local mode: {wrapped_query}") 376 | results = graph.query(wrapped_query) 377 | else: 378 | logger.debug(f"Executing query directly: {query}") 379 | results = graph.query(query) 380 | ctx.request_context.lifespan_context["metrics"]["queries"] += 1 381 | ctx.request_context.lifespan_context["metrics"]["total_time"] += time.time() - start_time 382 | return "\n".join(str(row) for row in results) 383 | except Exception as e: 384 | logger.error(f"SPARQL query error: {str(e)}") 385 | return f"Query error: {str(e)}" 386 | 387 | @mcp.tool() 388 | def explore_url(url: str, ctx: Context) -> str: 389 | """Extract triples from an RSS/OPML feed URL and store them in the feed graph. 390 | 391 | In local mode, also merges into the main graph. 392 | 393 | Args: 394 | url (str): The URL of the feed to explore (e.g., 'http://rss.cnn.com/rss/cnn_topstories.rss'). 395 | ctx (Context): The FastMCP context object. 396 | 397 | Returns: 398 | str: A message indicating the number of entries added. 399 | 400 | Raises: 401 | Exception: If fetching or parsing the feed fails. 402 | """ 403 | graph = ctx.request_context.lifespan_context["graph"] 404 | feed_graph = ctx.request_context.lifespan_context["feed_graph"] 405 | try: 406 | response = requests.get(url) 407 | feed = feedparser.parse(response.content) 408 | for entry in feed.entries[:5]: 409 | feed_graph.add((rdflib.URIRef(entry.link), rdflib.URIRef("http://example.org/title"), rdflib.Literal(entry.title))) 410 | if not (HAS_SPARQLSTORE and isinstance(graph, SPARQLStore)): 411 | for triple in feed_graph: 412 | graph.add(triple) 413 | return f"Added {len(feed.entries[:5])} entries from {url} to feed_graph" 414 | except Exception as e: 415 | logger.error(f"Explore URL error: {str(e)}") 416 | raise 417 | 418 | @mcp.tool() 419 | def graph_stats(ctx: Context) -> str: 420 | """Calculate and return statistics about the graph in JSON format. 421 | 422 | Args: 423 | ctx (Context): The FastMCP context object. 424 | 425 | Returns: 426 | str: JSON string containing graph statistics (e.g., triple count, unique subjects). 427 | 428 | Raises: 429 | Exception: If querying or calculating stats fails. 430 | """ 431 | graph = ctx.request_context.lifespan_context["graph"] 432 | try: 433 | if HAS_SPARQLSTORE and isinstance(graph, rdflib.SPARQLStore): 434 | stats = { 435 | "unique_subjects": len(set(graph.query("SELECT DISTINCT ?s WHERE { ?s ?p ?o } LIMIT 1000"))), 436 | "unique_predicates": len(set(graph.query("SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 1000"))), 437 | "unique_objects": len(set(graph.query("SELECT DISTINCT ?o WHERE { ?s ?p ?o } LIMIT 1000"))), 438 | "class_freq": dict(graph.query("SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class LIMIT 100")) 439 | } 440 | else: 441 | stats = { 442 | "triple_count": len(graph), 443 | "unique_subjects": len(set(s for s, _, _ in graph)), 444 | "unique_predicates": len(set(p for _, p, _ in graph)), 445 | "unique_objects": len(set(o for _, _, o in graph)), 446 | "class_freq": dict(graph.query("SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class")) 447 | } 448 | return json.dumps(stats) 449 | except Exception as e: 450 | logger.error(f"Graph stats error: {str(e)}") 451 | raise 452 | 453 | @mcp.tool() 454 | def count_triples(ctx: Context) -> str: 455 | """Count triples in the graph. Disabled in SPARQL Endpoint Mode; use a custom prompt instead. 456 | 457 | Args: 458 | ctx (Context): The FastMCP context object. 459 | 460 | Returns: 461 | str: Number of triples as a string, or an error message if counting fails or in SPARQL mode. 462 | """ 463 | if args.sparql_endpoint: # Fixed: Use args instead of undefined SPARQL_ENDPOINT 464 | return "Error: count_triples is not supported in SPARQL Endpoint Mode. Write a custom SPARQL query to count triples." 465 | graph = ctx.request_context.lifespan_context["graph"] 466 | try: 467 | return str(len(graph)) 468 | except Exception as e: 469 | return f"Error counting triples: {str(e)}" 470 | 471 | @mcp.tool() 472 | def full_text_search(search_term: str, ctx: Context) -> str: 473 | """Perform a full-text search on the graph or endpoint, avoiding proprietary syntax. 474 | 475 | Args: 476 | search_term (str): The term to search for. 477 | ctx (Context): The FastMCP context object. 478 | 479 | Returns: 480 | str: Search results as a newline-separated string, or an error message if the search fails. 481 | """ 482 | graph = ctx.request_context.lifespan_context["graph"] 483 | query = f""" 484 | SELECT DISTINCT ?s ?label 485 | WHERE {{ 486 | ?s ?p ?o . 487 | FILTER(REGEX(STR(?o), "{search_term}", "i")) 488 | OPTIONAL {{ ?s rdfs:label ?label }} 489 | }} LIMIT 100 490 | """ 491 | try: 492 | results = graph.query(query) 493 | return "\n".join(str(row) for row in results) 494 | except Exception as e: 495 | logger.error(f"Full-text search error: {str(e)}") 496 | return f"Error: {str(e)}" 497 | 498 | @mcp.tool() 499 | def health_check(ctx: Context) -> str: 500 | """Check the health of the triplestore connection. 501 | 502 | Args: 503 | ctx (Context): The FastMCP context object. 504 | 505 | Returns: 506 | str: 'Healthy' if the connection is good, 'Unhealthy: ' otherwise. 507 | """ 508 | graph = ctx.request_context.lifespan_context["graph"] 509 | try: 510 | graph.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1") 511 | return "Healthy" 512 | except Exception as e: 513 | logger.error(f"Health check error: {str(e)}") 514 | return f"Unhealthy: {str(e)}" 515 | 516 | @mcp.tool() 517 | def get_mode(ctx: Context) -> str: 518 | """Get the current mode of RDF Explorer. Useful for knowledge graph and semantic tech users to verify data source. 519 | 520 | Args: 521 | ctx (Context): The FastMCP context object. 522 | 523 | Returns: 524 | str: A message indicating the mode and dataset or endpoint. 525 | """ 526 | triple_file = ctx.request_context.lifespan_context["triple_file"] 527 | sparql_endpoint = ctx.request_context.lifespan_context["sparql_endpoint"] 528 | if sparql_endpoint: 529 | return f"SPARQL Endpoint Mode with Endpoint: '{sparql_endpoint}'" 530 | else: 531 | return f"Local File Mode with Dataset: '{triple_file}'" 532 | 533 | # Prompts 534 | @mcp.prompt() 535 | def analyze_graph_structure(ctx: Context) -> list[base.Message]: 536 | """Initiate an analysis of the graph structure with sample schema data. 537 | 538 | Args: 539 | ctx (Context): The FastMCP context object. 540 | 541 | Returns: 542 | list[base.Message]: A list of messages to guide graph structure analysis. 543 | 544 | Raises: 545 | Exception: If retrieving the schema fails. 546 | """ 547 | try: 548 | schema = get_schema() 549 | source = "DBpedia" if ctx.request_context.lifespan_context["active_external_endpoint"] else "local triples" 550 | return [ 551 | base.UserMessage(f"Please analyze the structure of the {source} graph."), 552 | base.UserMessage(f"Here's a sample schema:\n{schema}"), 553 | base.AssistantMessage("What specific aspects would you like me to focus on?") 554 | ] 555 | except Exception as e: 556 | logger.error(f"Analyze graph structure error: {str(e)}") 557 | raise 558 | 559 | @mcp.prompt() 560 | def find_relationships(subject: str) -> str: 561 | """Generate a SPARQL query to find relationships for a given subject. 562 | 563 | Args: 564 | subject (str): The URI of the subject to query relationships for. 565 | 566 | Returns: 567 | str: A SPARQL query string to find relationships. 568 | """ 569 | return f""" 570 | Using the SPARQL query tool, find all relationships for the subject <{subject}>: 571 | SELECT ?predicate ?object WHERE {{ <{subject}> ?predicate ?object }} LIMIT 100 572 | """ 573 | 574 | @mcp.prompt() 575 | def graph_visualization(subject: str) -> list[base.Message]: 576 | """Generate a DOT visualization of the graph around a subject. 577 | 578 | Args: 579 | subject (str): The URI of the subject to visualize. 580 | 581 | Returns: 582 | list[base.Message]: A list of messages containing the DOT graph. 583 | 584 | Raises: 585 | Exception: If querying the graph for visualization fails. 586 | """ 587 | graph = mcp._lifespan_context["graph"] 588 | try: 589 | dot = ["digraph G {"] 590 | results = graph.query(f"SELECT ?p ?o WHERE {{ <{subject}> ?p ?o }} LIMIT 50") 591 | for row in results: 592 | dot.append(f'"{subject}" -> "{row["o"]}" [label="{row["p"]}"];') 593 | dot.append("}") 594 | return [ 595 | base.UserMessage(f"Visualize the graph around <{subject}>"), 596 | base.AssistantMessage("\n".join(dot) + "\n\nUse Graphviz (dot -Tpng) to render this DOT format.") 597 | ] 598 | except Exception as e: 599 | logger.error(f"Graph visualization error: {str(e)}") 600 | raise 601 | 602 | @mcp.prompt() 603 | def text_to_sparql(prompt: str, ctx: Context) -> str: 604 | """Convert a text prompt to a SPARQL query and execute it, with token limit checks. 605 | 606 | Args: 607 | prompt (str): The text prompt to convert to SPARQL. 608 | ctx (Context): The FastMCP context object. 609 | 610 | Returns: 611 | str: Query results with usage stats, or an error message if execution fails or token limits are exceeded. 612 | """ 613 | encoder = tiktoken.get_encoding("gpt2") 614 | start_time = time.time() 615 | grok_response = {"endpoint": None, "query": "SELECT ?s WHERE { ?s ?p ?o } LIMIT 1"} # Placeholder 616 | endpoint = grok_response.get("endpoint") 617 | query = grok_response["query"] 618 | logger.debug(f"Prompt received: {prompt}") 619 | input_tokens = len(encoder.encode(prompt + query)) 620 | max_tokens = ctx.request_context.lifespan_context["max_tokens"] 621 | if input_tokens > max_tokens: 622 | logger.debug(f"Token limit exceeded: {input_tokens} > {max_tokens}") 623 | return f"Error: Input exceeds token limit ({input_tokens} tokens > {max_tokens}). Shorten your prompt or increase MAX_TOKENS with 'set_max_tokens'." 624 | active_endpoint = ctx.request_context.lifespan_context["active_external_endpoint"] 625 | use_local = active_endpoint is None and endpoint is None 626 | use_configured = active_endpoint and (endpoint is None or endpoint == active_endpoint) 627 | use_extracted = endpoint and endpoint != active_endpoint 628 | logger.debug(f"Execution context - Local: {use_local}, Configured: {use_configured}, Extracted: {use_extracted}") 629 | try: 630 | if use_extracted: 631 | results = ctx.request_context.call_tool("execute_on_endpoint", {"endpoint": endpoint, "query": query}) 632 | logger.debug(f"Executed on extracted endpoint {endpoint}") 633 | elif use_local: 634 | results = ctx.request_context.call_tool("sparql_query", {"query": query, "use_service": False}) 635 | logger.debug("Executed on local graph") 636 | elif use_configured: 637 | results = ctx.request_context.call_tool("sparql_query", {"query": query}) 638 | logger.debug(f"Executed on configured endpoint {active_endpoint}") 639 | else: 640 | logger.debug("No valid execution context") 641 | return "Unable to determine execution context for the query." 642 | output_tokens = len(encoder.encode(results)) 643 | total_tokens = input_tokens + output_tokens 644 | exec_time = time.time() - start_time 645 | usage_stats = f"[Resource Usage: Input Tokens: {input_tokens}, Output Tokens: {output_tokens}, Total: {total_tokens}, Time: {exec_time:.2f}s]" 646 | logger.debug(f"Usage stats generated: {usage_stats}") 647 | return f"{results}\n\n{usage_stats}" 648 | except Exception as e: 649 | logger.error(f"Query execution error: {str(e)}") 650 | if "interrupted" in str(e).lower(): 651 | return f"Error: Response interrupted, likely due to token limit (Input: {input_tokens} tokens, Max: {max_tokens}). Shorten input or increase MAX_TOKENS." 652 | return f"Error executing query: {str(e)}" 653 | 654 | @mcp.prompt() 655 | def tutorial(ctx: Context) -> list[base.Message]: 656 | """Provide an interactive tutorial for RDF/SPARQL usage. 657 | 658 | Args: 659 | ctx (Context): The FastMCP context object. 660 | 661 | Returns: 662 | list[base.Message]: A list of tutorial messages tailored to the current mode. 663 | """ 664 | source = "DBpedia" if args.sparql_endpoint else "local triples" # Fixed: Use args 665 | example_query = "SELECT ?s WHERE { ?s a dbo:Person } LIMIT 10" if args.sparql_endpoint else "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10" 666 | example_viz = "http://dbpedia.org/resource/Albert_Einstein" if args.sparql_endpoint else "http://example.org/person1" 667 | return [ 668 | base.UserMessage("Start the RDF/SPARQL tutorial"), 669 | base.AssistantMessage(f"Step 1: This uses {source}. Try 'explore://classes' to see types."), 670 | base.AssistantMessage(f"Step 2: Query with SPARQL. Try 'sparql_query' with '{example_query}'."), 671 | base.AssistantMessage(f"Step 3: Visualize with 'graph_visualization({example_viz})'. Ready for more?") 672 | ] 673 | 674 | # Run the server 675 | if __name__ == "__main__": 676 | logger.info("Starting mcp.run()") 677 | try: 678 | mcp.run() 679 | except Exception as e: 680 | logger.error(f"Failed to start RDF Explorer: {str(e)}") 681 | sys.exit(1) 682 | logger.info("mcp.run() completed") --------------------------------------------------------------------------------