├── src
    └── mcp-rdf-explorer
    │   ├── __init__.py
    │   └── server.py
├── requirements.txt
├── LICENSE
├── test_rdf_explorer.py
└── README.md


/src/mcp-rdf-explorer/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | fastmcp==0.4.1
2 | rdflib[sparql]
3 | requests
4 | feedparser
5 | tiktoken


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Emeka Azuka Okoye
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/test_rdf_explorer.py:
--------------------------------------------------------------------------------
 1 | # pytest test_rdf_explorer.py -v
 2 | 
 3 | import pytest
 4 | from server import triplestore_lifespan, get_mode, FastMCP, Context
 5 | from unittest.mock import AsyncMock, patch
 6 | import rdflib
 7 | 
 8 | # Mock fixtures (unchanged)
 9 | @pytest.fixture
10 | def mock_mcp():
11 |     mcp = FastMCP("Test")
12 |     mcp._lifespan_context = {
13 |         "active_external_endpoint": None,
14 |         "triple_file": "test.ttl",
15 |         "sparql_endpoint": ""
16 |     }
17 |     return mcp
18 | 
19 | @pytest.fixture
20 | def mock_context(mock_mcp):
21 |     class MockRequestContext:
22 |         lifespan_context = mock_mcp._lifespan_context
23 |     class MockContext:
24 |         request_context = MockRequestContext()
25 |     return MockContext()
26 | 
27 | # Existing tests (unchanged)
28 | def test_get_mode_local(mock_context):
29 |     result = get_mode(mock_context)
30 |     assert result == "Local File Mode with Dataset: 'test.ttl'"
31 | 
32 | def test_get_mode_sparql(mock_context):
33 |     mock_context.request_context.lifespan_context["active_external_endpoint"] = "https://dbpedia.org/sparql"
34 |     mock_context.request_context.lifespan_context["sparql_endpoint"] = "https://dbpedia.org/sparql"
35 |     result = get_mode(mock_context)
36 |     assert result == "SPARQL Endpoint Mode with Endpoint: 'https://dbpedia.org/sparql'"
37 | 
38 | @pytest.mark.asyncio
39 | async def test_triplestore_lifespan_local():
40 |     mock_server = AsyncMock()
41 |     with patch("rdflib.Graph.parse") as mock_parse:
42 |         mock_parse.return_value = rdflib.Graph()
43 |         async with triplestore_lifespan(mock_server, "test.ttl", "") as context:
44 |             assert context["triple_file"] == "test.ttl"
45 |             assert context["active_external_endpoint"] is None
46 |             assert isinstance(context["graph"], rdflib.Graph)
47 | 
48 | @pytest.mark.asyncio
49 | async def test_triplestore_lifespan_sparql():
50 |     mock_server = AsyncMock()
51 |     with patch("rdflib.plugins.stores.sparqlstore.SPARQLStore.query") as mock_query:
52 |         mock_query.return_value = []
53 |         async with triplestore_lifespan(mock_server, "test.ttl", "https://dbpedia.org/sparql") as context:
54 |             assert context["sparql_endpoint"] == "https://dbpedia.org/sparql"
55 |             assert context["active_external_endpoint"] == "https://dbpedia.org/sparql"
56 |             assert "graph" in context
57 | 
58 | # New failure tests
59 | @pytest.mark.asyncio
60 | async def test_triplestore_lifespan_sparql_failure():
61 |     mock_server = AsyncMock()
62 |     with patch("rdflib.plugins.stores.sparqlstore.SPARQLStore.query") as mock_query:
63 |         mock_query.side_effect = Exception("Connection failed")
64 |         with pytest.raises(Exception, match="Connection failed"):
65 |             async with triplestore_lifespan(mock_server, "test.ttl", "http://invalid.endpoint"):
66 |                 pass
67 | 
68 | @pytest.mark.asyncio
69 | async def test_triplestore_lifespan_local_file_missing():
70 |     mock_server = AsyncMock()
71 |     with patch("rdflib.Graph.parse") as mock_parse:
72 |         mock_parse.side_effect = FileNotFoundError("File not found")
73 |         with pytest.raises(FileNotFoundError, match="File not found"):
74 |             async with triplestore_lifespan(mock_server, "missing.ttl", ""):
75 |                 pass


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # RDF Explorer v1.0.0 
  2 | 
  3 | ## Overview
  4 | A Model Context Protocol (MCP) server that provides conversational interface for the exploration and analysis of RDF (Turtle) based Knowledge Graph in Local File mode or SPARQL Endpoint mode. This server facilitates communication between AI applications (hosts/clients) and RDF data, making graph exploration and analyzing graph data through SPARQL queries. A perfect tool for knowledge graph research and AI data preparation. 
  5 | 
  6 | 
  7 | ## Components
  8 | 
  9 | ### Tools
 10 | The server implements SPARQL queries and search functionality:
 11 | 
 12 | - `execute_on_endpoint`
 13 |    - Execute a SPARQL query directly on an external endpoint
 14 |    - Input:
 15 |      - `endpoint` (str): The SPARQL endpoint URL to query.
 16 |      - `query` (str): The SPARQL query to execute.
 17 |      - `ctx` (Context): The FastMCP context object.
 18 |    - Returns: Query results as a newline-separated string, or an error message.
 19 | 
 20 | - `sparql_query`
 21 |    - Execute a SPARQL query on the current graph or active external endpoint
 22 |    - Input:
 23 |      - `query` (str): The SPARQL query to execute.
 24 |      - `ctx` (Context): The FastMCP context object.
 25 |      - `use_service` (bool): Whether to use a SERVICE clause for federated queries in local mode (default: True).
 26 |    - Returns: Query results as a newline-separated string, or an error message.
 27 | 
 28 | - `graph_stats`
 29 |    - Calculate and return statistics about the graph in JSON format
 30 |    - Input:
 31 |      - `ctx` (Context): The FastMCP context object.
 32 |    - Returns: JSON string containing graph statistics (e.g., triple count, unique subjects).
 33 | 
 34 | - `count_triples`
 35 |    - Count triples in the graph. Disabled in SPARQL Endpoint Mode; use a custom prompt instead.
 36 |    - Input:
 37 |      - `ctx` (Context): The FastMCP context object.
 38 |    - Returns: Number of triples as a string, or an error message.
 39 | 
 40 | 
 41 | - `full_text_search`
 42 |    - Perform a full-text search on the graph or endpoint, avoiding proprietary syntax.
 43 |    - Input:
 44 |      - `search_term` (str): The term to search for.
 45 |      - `ctx` (Context): The FastMCP context object.
 46 |    - Returns: Search results as a newline-separated string, or an error message.
 47 | 
 48 | 
 49 | - `health_check`
 50 |    - Check the health of the triplestore connection.
 51 |    - Input:
 52 |      - `ctx` (Context): The FastMCP context object.
 53 |    - Returns: 'Healthy' if the connection is good, 'Unhealthy: <error>' otherwise.
 54 | 
 55 | 
 56 | - `get_mode`
 57 |    - Get the current mode of RDF Explorer. Useful for knowledge graph and semantic tech users to verify data source.
 58 |    - Input:
 59 |      - `ctx` (Context): The FastMCP context object.
 60 |    - Returns: A message indicating the mode and dataset or endpoint.
 61 | 
 62 | 
 63 | ### Resources
 64 | 
 65 | The server exposes the following resources:
 66 | - `schema://all`: Retrieve schema information (classes and properties) from the graph.
 67 |   - Returns: A newline-separated string of schema elements (classes and properties).
 68 | 
 69 | - `queries://{template_name}`: Retrieve a predefined SPARQL query template by name.
 70 |   - Returns: The SPARQL query string or 'Template not found'.
 71 | 
 72 | - `explore://{query_name}`: Execute an exploratory SPARQL query by name and return results in JSON.
 73 |   - `query_name` (str): The name of the exploratory query (e.g., 'classes', 'relationships/URI').
 74 |   - Returns: JSON string of query results.
 75 | 
 76 | - `explore://report`: Generate a Markdown report of exploratory queries.
 77 |   - Returns: A Markdown-formatted report string.
 78 | 
 79 | 
 80 | 
 81 | ### Prompts
 82 | 
 83 | The server exposes the following prompts:
 84 | - `analyze_graph_structure`: Initiate an analysis of the graph structure with schema data.
 85 |   - Returns: A list of messages to guide graph structure analysis.
 86 | 
 87 | - `find_relationships`: Generate a SPARQL query to find relationships for a given subject.
 88 |   - Returns: A SPARQL query string to find relationships.
 89 | 
 90 | - `text_to_sparql`: Convert a text prompt to a SPARQL query and execute it, with token limit checks.
 91 |   - `prompt` (str): The text prompt to convert to SPARQL.
 92 |   - Returns: Query results with usage stats, or an error message.
 93 |  
 94 | 
 95 | 
 96 | 
 97 | ## Setup
 98 | 
 99 | ## Configuration
100 | 
101 | ### Installing on Claude Desktop
102 | Before starting make sure [Claude Desktop](https://claude.ai/download) is installed.
103 | 1. Go to: Settings > Developer > Edit Config
104 | 
105 | 2. Add the following to your `claude_desktop_config.json`:
106 | On MacOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
107 | On Windows: `%APPDATA%/Claude/claude_desktop_config.json`
108 | 
109 | To use with a local RDF Turtle file, use this version with `--triple-file` args
110 | ```json
111 | {
112 |   "mcpServers": {
113 |     "rdf_explorer": {
114 |       "command": "C:\\path\\to\\venv\\Scripts\\python.exe",
115 |       "args": ["C:\\path\\to\\server.py", "--triple-file", "your_file.ttl"]
116 |     }
117 |   }
118 | }
119 | ```
120 | 
121 | To use with a SPARQL Endpoint, use this version with `--sparql-endpoint` args
122 | ```json
123 | {
124 |   "mcpServers": {
125 |     "rdf_explorer": {
126 |       "command": "C:\\path\\to\\venv\\Scripts\\python.exe",
127 |       "args": ["C:\\path\\to\\server.py", "--sparql-endpoint", "https://example.com/sparql"]
128 |     }
129 |   }
130 | }
131 | ```
132 | 
133 | 3. Restart Claude Desktop and start querying and exploring graph data.
134 | 
135 | 4. Prompt: "what mode is RDF Explorer running?"
136 | 
137 | 
138 | 
139 | 
140 | ## Usage Examples
141 | 
142 | Here are examples of how you can explore RDF data using natural language:
143 | 
144 | ### Querying Data in Local File Mode
145 | 
146 | You can ask questions like:
147 | - "Show me all employees in the Sales department"
148 | - "Find the top 5 oldest customers"
149 | - "Who has purchased more than 3 products in the last month?"
150 | - "List all entities" 
151 | - "Using the DBpedia endpoint, list 10 songs by Michael Jackson" 
152 | - "Using the Wikidata endpoint, list 5 cities"
153 | - "count the triples"
154 | - "analyze the graph structure"
155 | - "Select ..."
156 | - "search '{text}' "
157 | - "find relationships of '{URI}'"
158 | - "what mode is RDF Explorer running?"
159 | 
160 | ### Querying Data in SPARQL Endpoint Mode
161 | 
162 | You can ask questions like:
163 | - "Using the DBpedia endpoint, list 10 songs by Michael Jackson" 
164 | - "Using the Wikidata endpoint, list 5 cities"
165 | - "Select ..."
166 | - "search '{text}' "
167 | - "find relationships of '{URI}'"
168 | - "what mode is RDF Explorer running?"
169 | 
170 | ## Development
171 | ```
172 | # clone the repository
173 | git clone https://github.com/emekaokoye/mcp-rdf-explorer.git
174 | cd mcp-rdf-explorer
175 | 
176 | # setup a virtual environment
177 | python -m venv venv
178 | source venv/bin/activate
179 | # windows: venv\Scripts\activate
180 | 
181 | # install development dependencies
182 | pip install -r requirements.txt
183 | 
184 | # run tests
185 | pytest test_rdf_explorer.py -v
186 | ```
187 | 
188 | ## License
189 | 
190 | This MCP server is licensed under the MIT License. This means you are free to use, modify, and distribute the software, subject to the terms and conditions of the MIT License. For more details, please see the [license](LICENSE) file in the project repository.
191 | 
192 | 


--------------------------------------------------------------------------------
/src/mcp-rdf-explorer/server.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import asyncio
  3 | import json
  4 | import logging
  5 | import sys
  6 | import time
  7 | import tiktoken
  8 | from contextlib import asynccontextmanager
  9 | from collections.abc import AsyncIterator
 10 | from typing import Dict, Any
 11 | 
 12 | import rdflib
 13 | import requests
 14 | import feedparser
 15 | from mcp.server.fastmcp import FastMCP, Context
 16 | from mcp.server.fastmcp.prompts import base
 17 | import os
 18 | 
 19 | # Configure logging at the start
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | if not logger.handlers:
 23 |     logging.basicConfig(
 24 |         level=logging.INFO,
 25 |         format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
 26 |         handlers=[logging.StreamHandler(sys.stderr)]
 27 |     )
 28 | 
 29 | # Check for SPARQLStore availability
 30 | try:
 31 |     from rdflib.plugins.stores.sparqlstore import SPARQLStore
 32 |     HAS_SPARQLSTORE = True
 33 | except ImportError:
 34 |     HAS_SPARQLSTORE = False
 35 |     logger.warning("SPARQLStore not available. SPARQL Endpoint Mode and external queries will be disabled.")
 36 | 
 37 | # Parse command-line arguments
 38 | parser = argparse.ArgumentParser(description="RDF Explorer MCP Server v1.0.0")
 39 | parser.add_argument("--triple-file", default="", help="Path to the local RDF triple file")
 40 | parser.add_argument("--sparql-endpoint", default="", help="SPARQL endpoint URL (empty for Local File Mode)")
 41 | args = parser.parse_args()
 42 | 
 43 | logger.info("Starting RDF Explorer MCP Server v1.0.0")
 44 | logger.info("Setting lifespan")
 45 | 
 46 | # Define MCP instance as a global to ensure it's accessible
 47 | mcp = FastMCP(
 48 |     "RDF Explorer",
 49 |     dependencies=["rdflib[sparql]", "requests", "feedparser", "tiktoken"],
 50 |     lifespan=lambda mcp: triplestore_lifespan(mcp, args.triple_file, args.sparql_endpoint)
 51 | )
 52 | 
 53 | @asynccontextmanager
 54 | async def triplestore_lifespan(server: FastMCP, triple_file: str, sparql_endpoint: str) -> AsyncIterator[Dict[str, Any]]:
 55 |     """Manage the lifespan of the triplestore, initializing and shutting down the graph connection.
 56 | 
 57 |     Args:
 58 |         server (FastMCP): The FastMCP server instance.
 59 |         triple_file (str): Path to the local RDF triple file.
 60 |         sparql_endpoint (str): URL of the SPARQL endpoint, if any.
 61 | 
 62 |     Yields:
 63 |         Dict[str, Any]: Context dictionary containing the graph, metrics, and other state.
 64 | 
 65 |     Raises:
 66 |         FileNotFoundError: If the triple file is not found.
 67 |         Exception: If connecting to the SPARQL endpoint or parsing the file fails.
 68 |     """
 69 |     logger.info(f"Initializing triplestore with triple_file={triple_file}, sparql_endpoint={sparql_endpoint}")
 70 |     
 71 |     metrics = {"queries": 0, "total_time": 0.0}
 72 |     external_stores = {}
 73 |     feed_graph = rdflib.Graph()
 74 |     active_external_endpoint = None
 75 |     max_tokens = 10000
 76 |     
 77 |     if sparql_endpoint and HAS_SPARQLSTORE:
 78 |         logger.info(f"Connecting to SPARQL endpoint: {sparql_endpoint}")
 79 |         try:
 80 |             graph = SPARQLStore(query_endpoint=sparql_endpoint)
 81 |             graph.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1")
 82 |             external_stores[sparql_endpoint] = graph
 83 |             active_external_endpoint = sparql_endpoint
 84 |             logger.info(f"Successfully connected to {sparql_endpoint}")
 85 |         except Exception as e:
 86 |             logger.error(f"Failed to connect to SPARQL endpoint: {str(e)}")
 87 |             raise
 88 |     else:
 89 |         graph = rdflib.Graph()
 90 |         file_path = os.path.join(os.path.dirname(__file__), triple_file)
 91 |         logger.info(f"Loading local RDF file: {file_path}")
 92 |         try:
 93 |             graph.parse(file_path, format="turtle")
 94 |             logger.info(f"Loaded {len(graph)} triples from local file")
 95 |         except FileNotFoundError:
 96 |             logger.error(f"RDF file not found: {file_path}")
 97 |             raise
 98 |         except Exception as e:
 99 |             logger.error(f"Failed to load RDF file: {str(e)}")
100 |             raise
101 |     
102 |     try:
103 |         logger.info("Triplestore initialized successfully")
104 |         yield {
105 |             "graph": graph,
106 |             "metrics": metrics,
107 |             "external_stores": external_stores,
108 |             "feed_graph": feed_graph,
109 |             "active_external_endpoint": active_external_endpoint,
110 |             "max_tokens": max_tokens,
111 |             "triple_file": triple_file,
112 |             "sparql_endpoint": sparql_endpoint
113 |         }
114 |     finally:
115 |         logger.info("Shutting down triplestore connection")
116 |         if sparql_endpoint and HAS_SPARQLSTORE and sparql_endpoint in external_stores:
117 |             external_stores[sparql_endpoint].close()
118 | 
119 | # Resources
120 | @mcp.resource("graph://{graph_id}")
121 | def get_graph(graph_id: str) -> str:
122 |     """Retrieve a graph by ID and serialize it in Turtle format.
123 | 
124 |     Args:
125 |         graph_id (str): Identifier for the graph (currently unused, returns main graph).
126 | 
127 |     Returns:
128 |         str: The serialized graph in Turtle format.
129 | 
130 |     Raises:
131 |         Exception: If serialization fails.
132 |     """
133 |     logger.debug(f"Fetching graph for graph_id: {graph_id}")
134 |     graph = mcp._lifespan_context["graph"]
135 |     try:
136 |         if HAS_SPARQLSTORE and isinstance(graph, rdflib.SPARQLStore):
137 |             results = graph.query("SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 100")
138 |             temp_graph = rdflib.Graph()
139 |             for s, p, o in results:
140 |                 temp_graph.add((s, p, o))
141 |             return temp_graph.serialize(format="turtle")
142 |         return graph.serialize(format="turtle")
143 |     except Exception as e:
144 |         logger.error(f"Error serializing graph {graph_id}: {str(e)}")
145 |         raise
146 | 
147 | @mcp.resource("feed://all")
148 | def get_feed_graph() -> str:
149 |     """Retrieve the feed graph stored by explore_url in Turtle format.
150 | 
151 |     Returns:
152 |         str: The serialized feed graph in Turtle format.
153 | 
154 |     Raises:
155 |         Exception: If serialization of the feed graph fails.
156 |     """
157 |     logger.debug("Fetching feed graph")
158 |     feed_graph = mcp._lifespan_context["feed_graph"]
159 |     try:
160 |         return feed_graph.serialize(format="turtle")
161 |     except Exception as e:
162 |         logger.error(f"Error serializing feed graph: {str(e)}")
163 |         raise
164 | 
165 | 
166 | @mcp.resource("schema://all")
167 | def get_schema() -> str:
168 |     """Retrieve schema information (classes and properties) from the graph.
169 |     Returns:
170 |         str: A newline-separated string of schema elements (classes and properties).
171 |     Raises:
172 |         Exception: If the schema query fails.
173 |     """
174 |     graph = mcp._lifespan_context["graph"]
175 |     schema_query = """
176 |     PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
177 |     PREFIX owl: <http://www.w3.org/2002/07/owl#>
178 |     PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
179 |     SELECT DISTINCT ?entity ?type
180 |     WHERE {
181 |         { ?entity a rdfs:Class . BIND("rdfs:Class" as ?type) } UNION
182 |         { ?entity a owl:Class . BIND("owl:Class" as ?type) } UNION  
183 |         { ?entity a rdf:Property . BIND("rdf:Property" as ?type) } UNION
184 |         { ?entity a owl:ObjectProperty . BIND("owl:ObjectProperty" as ?type) } UNION
185 |         { ?entity a owl:DatatypeProperty . BIND("owl:DatatypeProperty" as ?type) } UNION
186 |         { ?entity a owl:AnnotationProperty . BIND("owl:AnnotationProperty" as ?type) }
187 |     } 
188 |     ORDER BY ?type ?entity
189 |     LIMIT 100
190 |     """
191 |     try:
192 |         results = graph.query(schema_query)
193 |         return "\n".join(f"{row['type']}: {row['entity']}" for row in results)
194 |     except Exception as e:
195 |         logger.error(f"Schema query error: {str(e)}")
196 |         raise
197 | 
198 | 
199 | @mcp.resource("queries://{template_name}")
200 | def get_query_template(template_name: str) -> str:
201 |     """Retrieve a predefined SPARQL query template by name.
202 | 
203 |     Args:
204 |         template_name (str): The name of the query template (e.g., 'orphans', 'cycles').
205 | 
206 |     Returns:
207 |         str: The SPARQL query string or 'Template not found' if the name is invalid.
208 |     """
209 |     templates = {
210 |         "orphans": "SELECT ?s WHERE { ?s ?p ?o . FILTER NOT EXISTS { ?x ?y ?s } } LIMIT 100",
211 |         "cycles": "SELECT ?s ?o WHERE { ?s ?p ?o . ?o ?q ?s } LIMIT 100"
212 |     }
213 |     return templates.get(template_name, "Template not found")
214 | 
215 | @mcp.resource("explore://{query_name}")
216 | def exploratory_query(query_name: str) -> str:
217 |     """Execute an exploratory SPARQL query by name and return results in JSON.
218 | 
219 |     Args:
220 |         query_name (str): The name of the exploratory query (e.g., 'classes', 'relationships/URI').
221 | 
222 |     Returns:
223 |         str: JSON string of query results.
224 | 
225 |     Raises:
226 |         Exception: If the query execution fails.
227 |     """
228 |     graph = mcp._lifespan_context["graph"]
229 |     queries = {
230 |         "classes": "SELECT DISTINCT ?type ?label WHERE { ?s a ?type . OPTIONAL { ?type rdfs:label ?label } } LIMIT 100",
231 |         "properties": "SELECT DISTINCT ?objprop ?label WHERE { ?objprop a owl:ObjectProperty . OPTIONAL { ?objprop rdfs:label ?label } } LIMIT 100",
232 |         "data_properties": "SELECT DISTINCT ?dataprop ?label WHERE { ?dataprop a owl:DatatypeProperty . OPTIONAL { ?dataprop rdfs:label ?label } } LIMIT 100",
233 |         "used_properties": "SELECT DISTINCT ?p ?label WHERE { ?s ?p ?o . OPTIONAL { ?p rdfs:label ?label } } LIMIT 100",
234 |         "entities": "SELECT DISTINCT ?entity ?elabel ?type ?tlabel WHERE { ?entity a ?type . OPTIONAL { ?entity rdfs:label ?elabel } . OPTIONAL { ?type rdfs:label ?tlabel } } LIMIT 100",
235 |         "top_predicates": "SELECT ?pred (COUNT(*) as ?triples) WHERE { ?s ?pred ?o } GROUP BY ?pred ORDER BY DESC(?triples) LIMIT 100",
236 |         "class_counts": "SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class ORDER BY ?count LIMIT 100",
237 |         "property_counts": "SELECT ?p (COUNT(?s) AS ?count) WHERE { ?s ?p ?o } GROUP BY ?p ORDER BY ?count LIMIT 100"
238 |     }
239 |     if query_name.startswith("relationships/"):
240 |         subject = query_name.split("/", 1)[1]
241 |         query = f"SELECT ?predicate ?object WHERE {{ <{subject}> ?predicate ?object }} LIMIT 100"
242 |     else:
243 |         query = queries.get(query_name, "Query not found")
244 |     try:
245 |         results = graph.query(query)
246 |         return json.dumps([dict(row) for row in results])
247 |     except Exception as e:
248 |         logger.error(f"Exploratory query error: {str(e)}")
249 |         raise
250 | 
251 | @mcp.resource("explore://report")
252 | def exploratory_report() -> str:
253 |     """Generate a Markdown report of exploratory queries.
254 | 
255 |     Returns:
256 |         str: A Markdown-formatted report string.
257 | 
258 |     Raises:
259 |         Exception: If any query in the report generation fails (error included in report).
260 |     """
261 |     graph = mcp._lifespan_context["graph"]
262 |     report = ["# RDF Exploration Report"]
263 |     for name in ["classes", "used_properties", "top_predicates"]:
264 |         try:
265 |             results = graph.query(mcp.call_resource(f"explore://{name}"))
266 |             report.append(f"## {name.replace('_', ' ').title()}")
267 |             report.append("| " + " | ".join(results.vars) + " |")
268 |             report.append("| " + " | ".join(["---"] * len(results.vars)) + " |")
269 |             for row in results:
270 |                 report.append("| " + " | ".join(str(row[var]) for var in results.vars) + " |")
271 |         except Exception as e:
272 |             report.append(f"## {name.replace('_', ' ').title()}\nError: {str(e)}")
273 |     return "\n".join(report)
274 | 
275 | @mcp.resource("metrics://status")
276 | def get_metrics() -> str:
277 |     """Retrieve server metrics in JSON format.
278 | 
279 |     Returns:
280 |         str: JSON string containing query count and total execution time.
281 |     """
282 |     metrics = mcp._lifespan_context["metrics"]
283 |     return json.dumps(metrics)
284 | 
285 | # Tools
286 | @mcp.tool()
287 | def set_max_tokens(tokens: int, ctx: Context) -> str:
288 |     """Set the maximum token limit for prompts.
289 | 
290 |     Args:
291 |         tokens (int): The new maximum token limit (must be positive).
292 |         ctx (Context): The FastMCP context object.
293 | 
294 |     Returns:
295 |         str: Confirmation message or error if the value is invalid.
296 |     """
297 |     if tokens <= 0:
298 |         return "Error: MAX_TOKENS must be positive."
299 |     ctx.request_context.lifespan_context["max_tokens"] = tokens
300 |     logger.info(f"Set MAX_TOKENS to {tokens}")
301 |     return f"MAX_TOKENS set to {tokens}"
302 | 
303 | @mcp.tool()
304 | def execute_on_endpoint(endpoint: str, query: str, ctx: Context) -> str:
305 |     """Execute a SPARQL query directly on an external endpoint.
306 | 
307 |     Args:
308 |         endpoint (str): The SPARQL endpoint URL to query.
309 |         query (str): The SPARQL query to execute.
310 |         ctx (Context): The FastMCP context object.
311 | 
312 |     Returns:
313 |         str: Query results as a newline-separated string, or an error message if SPARQLStore is unavailable or the query fails.
314 |     """
315 |     if not HAS_SPARQLSTORE:
316 |         return "SPARQLStore not available. Cannot query external endpoints."
317 |     try:
318 |         store = rdflib.SPARQLStore(query_endpoint=endpoint)
319 |         results = store.query(query)
320 |         logger.debug(f"Executed query on endpoint {endpoint}: {query}")
321 |         return "\n".join(str(row) for row in results)
322 |     except Exception as e:
323 |         logger.error(f"Direct endpoint query error: {str(e)}")
324 |         return f"Query error: {str(e)}"
325 | 
326 | @mcp.tool()
327 | def connect_external_triplestore(endpoint: str, ctx: Context) -> str:
328 |     """Connect to an external SPARQL endpoint and optionally set it as active for local mode queries.
329 | 
330 |     Args:
331 |         endpoint (str): The SPARQL endpoint URL to connect to.
332 |         ctx (Context): The FastMCP context object.
333 | 
334 |     Returns:
335 |         str: Connection status message.
336 | 
337 |     Raises:
338 |         Exception: If connecting to the endpoint fails.
339 |     """
340 |     if not HAS_SPARQLSTORE:
341 |         return "SPARQLStore not available. Cannot connect to external endpoints."
342 |     try:
343 |         store = rdflib.SPARQLStore(query_endpoint=endpoint)
344 |         store.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1")
345 |         ctx.request_context.lifespan_context["external_stores"][endpoint] = store
346 |         if not ctx.request_context.lifespan_context["active_external_endpoint"]:
347 |             ctx.request_context.lifespan_context["active_external_endpoint"] = endpoint
348 |             logger.info(f"Set active external endpoint to {endpoint} for local mode")
349 |             return f"Connected to {endpoint} and set as active endpoint for local mode queries"
350 |         else:
351 |             logger.info(f"Connected to {endpoint} but not set as active (SPARQL endpoint mode active)")
352 |             return f"Connected to {endpoint} (use SERVICE clause manually in SPARQL endpoint mode)"
353 |     except Exception as e:
354 |         logger.error(f"External triplestore connection error: {str(e)}")
355 |         raise
356 | 
357 | @mcp.tool()
358 | def sparql_query(query: str, ctx: Context, use_service: bool = True) -> str:
359 |     """Execute a SPARQL query on the current graph or active external endpoint.
360 | 
361 |     Args:
362 |         query (str): The SPARQL query to execute.
363 |         ctx (Context): The FastMCP context object.
364 |         use_service (bool): Whether to use a SERVICE clause for federated queries in local mode (default: True).
365 | 
366 |     Returns:
367 |         str: Query results as a newline-separated string, or an error message if the query fails.
368 |     """
369 |     graph = ctx.request_context.lifespan_context["graph"]
370 |     active_external_endpoint = ctx.request_context.lifespan_context["active_external_endpoint"]
371 |     start_time = time.time()
372 |     try:
373 |         if not active_external_endpoint and active_external_endpoint and use_service:
374 |             wrapped_query = f"SELECT ?s WHERE {{ SERVICE <{active_external_endpoint}> {{ {query} }} }}"
375 |             logger.debug(f"Executing federated query in local mode: {wrapped_query}")
376 |             results = graph.query(wrapped_query)
377 |         else:
378 |             logger.debug(f"Executing query directly: {query}")
379 |             results = graph.query(query)
380 |         ctx.request_context.lifespan_context["metrics"]["queries"] += 1
381 |         ctx.request_context.lifespan_context["metrics"]["total_time"] += time.time() - start_time
382 |         return "\n".join(str(row) for row in results)
383 |     except Exception as e:
384 |         logger.error(f"SPARQL query error: {str(e)}")
385 |         return f"Query error: {str(e)}"
386 | 
387 | @mcp.tool()
388 | def explore_url(url: str, ctx: Context) -> str:
389 |     """Extract triples from an RSS/OPML feed URL and store them in the feed graph.
390 | 
391 |     In local mode, also merges into the main graph.
392 | 
393 |     Args:
394 |         url (str): The URL of the feed to explore (e.g., 'http://rss.cnn.com/rss/cnn_topstories.rss').
395 |         ctx (Context): The FastMCP context object.
396 | 
397 |     Returns:
398 |         str: A message indicating the number of entries added.
399 | 
400 |     Raises:
401 |         Exception: If fetching or parsing the feed fails.
402 |     """
403 |     graph = ctx.request_context.lifespan_context["graph"]
404 |     feed_graph = ctx.request_context.lifespan_context["feed_graph"]
405 |     try:
406 |         response = requests.get(url)
407 |         feed = feedparser.parse(response.content)
408 |         for entry in feed.entries[:5]:
409 |             feed_graph.add((rdflib.URIRef(entry.link), rdflib.URIRef("http://example.org/title"), rdflib.Literal(entry.title)))
410 |         if not (HAS_SPARQLSTORE and isinstance(graph, SPARQLStore)):
411 |             for triple in feed_graph:
412 |                 graph.add(triple)
413 |         return f"Added {len(feed.entries[:5])} entries from {url} to feed_graph"
414 |     except Exception as e:
415 |         logger.error(f"Explore URL error: {str(e)}")
416 |         raise
417 | 
418 | @mcp.tool()
419 | def graph_stats(ctx: Context) -> str:
420 |     """Calculate and return statistics about the graph in JSON format.
421 | 
422 |     Args:
423 |         ctx (Context): The FastMCP context object.
424 | 
425 |     Returns:
426 |         str: JSON string containing graph statistics (e.g., triple count, unique subjects).
427 | 
428 |     Raises:
429 |         Exception: If querying or calculating stats fails.
430 |     """
431 |     graph = ctx.request_context.lifespan_context["graph"]
432 |     try:
433 |         if HAS_SPARQLSTORE and isinstance(graph, rdflib.SPARQLStore):
434 |             stats = {
435 |                 "unique_subjects": len(set(graph.query("SELECT DISTINCT ?s WHERE { ?s ?p ?o } LIMIT 1000"))),
436 |                 "unique_predicates": len(set(graph.query("SELECT DISTINCT ?p WHERE { ?s ?p ?o } LIMIT 1000"))),
437 |                 "unique_objects": len(set(graph.query("SELECT DISTINCT ?o WHERE { ?s ?p ?o } LIMIT 1000"))),
438 |                 "class_freq": dict(graph.query("SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class LIMIT 100"))
439 |             }
440 |         else:
441 |             stats = {
442 |                 "triple_count": len(graph),
443 |                 "unique_subjects": len(set(s for s, _, _ in graph)),
444 |                 "unique_predicates": len(set(p for _, p, _ in graph)),
445 |                 "unique_objects": len(set(o for _, _, o in graph)),
446 |                 "class_freq": dict(graph.query("SELECT ?class (COUNT(?s) AS ?count) WHERE { ?s a ?class } GROUP BY ?class"))
447 |             }
448 |         return json.dumps(stats)
449 |     except Exception as e:
450 |         logger.error(f"Graph stats error: {str(e)}")
451 |         raise
452 | 
453 | @mcp.tool()
454 | def count_triples(ctx: Context) -> str:
455 |     """Count triples in the graph. Disabled in SPARQL Endpoint Mode; use a custom prompt instead.
456 | 
457 |     Args:
458 |         ctx (Context): The FastMCP context object.
459 | 
460 |     Returns:
461 |         str: Number of triples as a string, or an error message if counting fails or in SPARQL mode.
462 |     """
463 |     if args.sparql_endpoint:  # Fixed: Use args instead of undefined SPARQL_ENDPOINT
464 |         return "Error: count_triples is not supported in SPARQL Endpoint Mode. Write a custom SPARQL query to count triples."
465 |     graph = ctx.request_context.lifespan_context["graph"]
466 |     try:
467 |         return str(len(graph))
468 |     except Exception as e:
469 |         return f"Error counting triples: {str(e)}"
470 | 
471 | @mcp.tool()
472 | def full_text_search(search_term: str, ctx: Context) -> str:
473 |     """Perform a full-text search on the graph or endpoint, avoiding proprietary syntax.
474 | 
475 |     Args:
476 |         search_term (str): The term to search for.
477 |         ctx (Context): The FastMCP context object.
478 | 
479 |     Returns:
480 |         str: Search results as a newline-separated string, or an error message if the search fails.
481 |     """
482 |     graph = ctx.request_context.lifespan_context["graph"]
483 |     query = f"""
484 |     SELECT DISTINCT ?s ?label
485 |     WHERE {{
486 |       ?s ?p ?o .
487 |       FILTER(REGEX(STR(?o), "{search_term}", "i"))
488 |       OPTIONAL {{ ?s rdfs:label ?label }}
489 |     }} LIMIT 100
490 |     """
491 |     try:
492 |         results = graph.query(query)
493 |         return "\n".join(str(row) for row in results)
494 |     except Exception as e:
495 |         logger.error(f"Full-text search error: {str(e)}")
496 |         return f"Error: {str(e)}"
497 | 
498 | @mcp.tool()
499 | def health_check(ctx: Context) -> str:
500 |     """Check the health of the triplestore connection.
501 | 
502 |     Args:
503 |         ctx (Context): The FastMCP context object.
504 | 
505 |     Returns:
506 |         str: 'Healthy' if the connection is good, 'Unhealthy: <error>' otherwise.
507 |     """
508 |     graph = ctx.request_context.lifespan_context["graph"]
509 |     try:
510 |         graph.query("SELECT ?s WHERE { ?s ?p ?o } LIMIT 1")
511 |         return "Healthy"
512 |     except Exception as e:
513 |         logger.error(f"Health check error: {str(e)}")
514 |         return f"Unhealthy: {str(e)}"
515 | 
516 | @mcp.tool()
517 | def get_mode(ctx: Context) -> str:
518 |     """Get the current mode of RDF Explorer. Useful for knowledge graph and semantic tech users to verify data source.
519 | 
520 |     Args:
521 |         ctx (Context): The FastMCP context object.
522 | 
523 |     Returns:
524 |         str: A message indicating the mode and dataset or endpoint.
525 |     """
526 |     triple_file = ctx.request_context.lifespan_context["triple_file"]
527 |     sparql_endpoint = ctx.request_context.lifespan_context["sparql_endpoint"]
528 |     if sparql_endpoint:
529 |         return f"SPARQL Endpoint Mode with Endpoint: '{sparql_endpoint}'"
530 |     else:
531 |         return f"Local File Mode with Dataset: '{triple_file}'"
532 | 
533 | # Prompts
534 | @mcp.prompt()
535 | def analyze_graph_structure(ctx: Context) -> list[base.Message]:
536 |     """Initiate an analysis of the graph structure with sample schema data.
537 | 
538 |     Args:
539 |         ctx (Context): The FastMCP context object.
540 | 
541 |     Returns:
542 |         list[base.Message]: A list of messages to guide graph structure analysis.
543 | 
544 |     Raises:
545 |         Exception: If retrieving the schema fails.
546 |     """
547 |     try:
548 |         schema = get_schema()
549 |         source = "DBpedia" if ctx.request_context.lifespan_context["active_external_endpoint"] else "local triples"
550 |         return [
551 |             base.UserMessage(f"Please analyze the structure of the {source} graph."),
552 |             base.UserMessage(f"Here's a sample schema:\n{schema}"),
553 |             base.AssistantMessage("What specific aspects would you like me to focus on?")
554 |         ]
555 |     except Exception as e:
556 |         logger.error(f"Analyze graph structure error: {str(e)}")
557 |         raise
558 | 
559 | @mcp.prompt()
560 | def find_relationships(subject: str) -> str:
561 |     """Generate a SPARQL query to find relationships for a given subject.
562 | 
563 |     Args:
564 |         subject (str): The URI of the subject to query relationships for.
565 | 
566 |     Returns:
567 |         str: A SPARQL query string to find relationships.
568 |     """
569 |     return f"""
570 |     Using the SPARQL query tool, find all relationships for the subject <{subject}>:
571 |     SELECT ?predicate ?object WHERE {{ <{subject}> ?predicate ?object }} LIMIT 100
572 |     """
573 | 
574 | @mcp.prompt()
575 | def graph_visualization(subject: str) -> list[base.Message]:
576 |     """Generate a DOT visualization of the graph around a subject.
577 | 
578 |     Args:
579 |         subject (str): The URI of the subject to visualize.
580 | 
581 |     Returns:
582 |         list[base.Message]: A list of messages containing the DOT graph.
583 | 
584 |     Raises:
585 |         Exception: If querying the graph for visualization fails.
586 |     """
587 |     graph = mcp._lifespan_context["graph"]
588 |     try:
589 |         dot = ["digraph G {"]
590 |         results = graph.query(f"SELECT ?p ?o WHERE {{ <{subject}> ?p ?o }} LIMIT 50")
591 |         for row in results:
592 |             dot.append(f'"{subject}" -> "{row["o"]}" [label="{row["p"]}"];')
593 |         dot.append("}")
594 |         return [
595 |             base.UserMessage(f"Visualize the graph around <{subject}>"),
596 |             base.AssistantMessage("\n".join(dot) + "\n\nUse Graphviz (dot -Tpng) to render this DOT format.")
597 |         ]
598 |     except Exception as e:
599 |         logger.error(f"Graph visualization error: {str(e)}")
600 |         raise
601 | 
602 | @mcp.prompt()
603 | def text_to_sparql(prompt: str, ctx: Context) -> str:
604 |     """Convert a text prompt to a SPARQL query and execute it, with token limit checks.
605 | 
606 |     Args:
607 |         prompt (str): The text prompt to convert to SPARQL.
608 |         ctx (Context): The FastMCP context object.
609 | 
610 |     Returns:
611 |         str: Query results with usage stats, or an error message if execution fails or token limits are exceeded.
612 |     """
613 |     encoder = tiktoken.get_encoding("gpt2")
614 |     start_time = time.time()
615 |     grok_response = {"endpoint": None, "query": "SELECT ?s WHERE { ?s ?p ?o } LIMIT 1"}  # Placeholder
616 |     endpoint = grok_response.get("endpoint")
617 |     query = grok_response["query"]
618 |     logger.debug(f"Prompt received: {prompt}")
619 |     input_tokens = len(encoder.encode(prompt + query))
620 |     max_tokens = ctx.request_context.lifespan_context["max_tokens"]
621 |     if input_tokens > max_tokens:
622 |         logger.debug(f"Token limit exceeded: {input_tokens} > {max_tokens}")
623 |         return f"Error: Input exceeds token limit ({input_tokens} tokens > {max_tokens}). Shorten your prompt or increase MAX_TOKENS with 'set_max_tokens'."
624 |     active_endpoint = ctx.request_context.lifespan_context["active_external_endpoint"]
625 |     use_local = active_endpoint is None and endpoint is None
626 |     use_configured = active_endpoint and (endpoint is None or endpoint == active_endpoint)
627 |     use_extracted = endpoint and endpoint != active_endpoint
628 |     logger.debug(f"Execution context - Local: {use_local}, Configured: {use_configured}, Extracted: {use_extracted}")
629 |     try:
630 |         if use_extracted:
631 |             results = ctx.request_context.call_tool("execute_on_endpoint", {"endpoint": endpoint, "query": query})
632 |             logger.debug(f"Executed on extracted endpoint {endpoint}")
633 |         elif use_local:
634 |             results = ctx.request_context.call_tool("sparql_query", {"query": query, "use_service": False})
635 |             logger.debug("Executed on local graph")
636 |         elif use_configured:
637 |             results = ctx.request_context.call_tool("sparql_query", {"query": query})
638 |             logger.debug(f"Executed on configured endpoint {active_endpoint}")
639 |         else:
640 |             logger.debug("No valid execution context")
641 |             return "Unable to determine execution context for the query."
642 |         output_tokens = len(encoder.encode(results))
643 |         total_tokens = input_tokens + output_tokens
644 |         exec_time = time.time() - start_time
645 |         usage_stats = f"[Resource Usage: Input Tokens: {input_tokens}, Output Tokens: {output_tokens}, Total: {total_tokens}, Time: {exec_time:.2f}s]"
646 |         logger.debug(f"Usage stats generated: {usage_stats}")
647 |         return f"{results}\n\n{usage_stats}"
648 |     except Exception as e:
649 |         logger.error(f"Query execution error: {str(e)}")
650 |         if "interrupted" in str(e).lower():
651 |             return f"Error: Response interrupted, likely due to token limit (Input: {input_tokens} tokens, Max: {max_tokens}). Shorten input or increase MAX_TOKENS."
652 |         return f"Error executing query: {str(e)}"
653 | 
654 | @mcp.prompt()
655 | def tutorial(ctx: Context) -> list[base.Message]:
656 |     """Provide an interactive tutorial for RDF/SPARQL usage.
657 | 
658 |     Args:
659 |         ctx (Context): The FastMCP context object.
660 | 
661 |     Returns:
662 |         list[base.Message]: A list of tutorial messages tailored to the current mode.
663 |     """
664 |     source = "DBpedia" if args.sparql_endpoint else "local triples"  # Fixed: Use args
665 |     example_query = "SELECT ?s WHERE { ?s a dbo:Person } LIMIT 10" if args.sparql_endpoint else "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 10"
666 |     example_viz = "http://dbpedia.org/resource/Albert_Einstein" if args.sparql_endpoint else "http://example.org/person1"
667 |     return [
668 |         base.UserMessage("Start the RDF/SPARQL tutorial"),
669 |         base.AssistantMessage(f"Step 1: This uses {source}. Try 'explore://classes' to see types."),
670 |         base.AssistantMessage(f"Step 2: Query with SPARQL. Try 'sparql_query' with '{example_query}'."),
671 |         base.AssistantMessage(f"Step 3: Visualize with 'graph_visualization({example_viz})'. Ready for more?")
672 |     ]
673 | 
674 | # Run the server
675 | if __name__ == "__main__":
676 |     logger.info("Starting mcp.run()")
677 |     try:
678 |         mcp.run()
679 |     except Exception as e:
680 |         logger.error(f"Failed to start RDF Explorer: {str(e)}")
681 |         sys.exit(1)
682 |     logger.info("mcp.run() completed")


--------------------------------------------------------------------------------