├── .env.example ├── .gitignore ├── .python-version ├── Dockerfile ├── LICENSE ├── README.md ├── docker-compose.yml ├── example-clients ├── __init__.py ├── claude_cli.py └── gemini-agent-cli.py ├── get_schema.py ├── pyproject.toml ├── server ├── __init__.py ├── app.py ├── config.py ├── database.py ├── logging_config.py ├── prompts │ ├── __init__.py │ ├── data_visualization.py │ ├── natural_language.py │ └── templates │ │ ├── __init__.py │ │ ├── generate_sql.md.jinja2 │ │ ├── generate_vega.md.jinja2 │ │ ├── justify_sql.md.jinja2 │ │ └── validate_nl.md.jinja2 ├── resources │ ├── __init__.py │ ├── data.py │ ├── extensions.py │ ├── extensions │ │ ├── pgvector.yaml │ │ └── postgis.yaml │ ├── schema.py │ └── sql │ │ ├── __init__.py │ │ ├── get_database.sql │ │ ├── get_schema.sql │ │ ├── get_schema_table.sql │ │ ├── get_schema_view.sql │ │ └── list_schemas.sql └── tools │ ├── __init__.py │ ├── connection.py │ ├── query.py │ └── viz.py ├── test.py └── uv.lock /.env.example: -------------------------------------------------------------------------------- 1 | PG_MCP_URL=http://localhost:8000/sse 2 | DATABASE_URL=postgresql://user:password@hostname:port/databasename 3 | ANTHROPIC_API_KEY=your-anthropic-api-key 4 | GEMINI_API_KEY=your-gemini-api-key -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.DS_Store 2 | .env 3 | .venv/ 4 | __pycache__/ 5 | logs/ -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Dockerfile 2 | FROM python:3.13-slim 3 | 4 | # The installer requires curl (and certificates) to download the release archive 5 | RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates 6 | 7 | # Download the latest uv installer 8 | ADD https://astral.sh/uv/install.sh /uv-installer.sh 9 | 10 | # Run the installer then remove it 11 | RUN sh /uv-installer.sh && rm /uv-installer.sh 12 | 13 | # Ensure the installed binary is on the `PATH` 14 | ENV PATH="/root/.local/bin/:$PATH" 15 | 16 | # Copy the project into the image 17 | ADD . /app 18 | 19 | # Sync the project into a new environment, using the frozen lockfile 20 | WORKDIR /app 21 | RUN uv sync --frozen 22 | 23 | # Run the application 24 | CMD ["uv", "run", "-m", "server.app"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Stuart Pennant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PostgreSQL Model Context Protocol (PG-MCP) Server 2 | 3 | A Model Context Protocol (MCP) server for PostgreSQL databases with enhanced capabilities for AI agents. 4 | 5 | More info on the pg-mcp project here: 6 | ### [https://stuzero.github.io/pg-mcp/](https://stuzero.github.io/pg-mcp/) 7 | 8 | ## Overview 9 | 10 | PG-MCP is a server implementation of the [Model Context Protocol](https://modelcontextprotocol.io) for PostgreSQL databases. It provides a comprehensive API for AI agents to discover, connect to, query, and understand PostgreSQL databases through MCP's resource-oriented architecture. 11 | 12 | This implementation builds upon and extends the [reference Postgres MCP implementation](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres) with several key enhancements: 13 | 14 | 1. **Full Server Implementation**: Built as a complete server with SSE transport for production use 15 | 2. **Multi-database Support**: Connect to multiple PostgreSQL databases simultaneously 16 | 3. **Rich Catalog Information**: Extracts and exposes table/column descriptions from the database catalog 17 | 4. **Extension Context**: Provides detailed YAML-based knowledge about PostgreSQL extensions like PostGIS and pgvector 18 | 5. **Query Explanation**: Includes a dedicated tool for analyzing query execution plans 19 | 6. **Robust Connection Management**: Proper lifecycle for database connections with secure connection ID handling 20 | 21 | ## Features 22 | 23 | ### Connection Management 24 | 25 | - **Connect Tool**: Register PostgreSQL connection strings and get a secure connection ID 26 | - **Disconnect Tool**: Explicitly close database connections when done 27 | - **Connection Pooling**: Efficient connection management with pooling 28 | 29 | ### Query Tools 30 | 31 | - **pg_query**: Execute read-only SQL queries using a connection ID 32 | - **pg_explain**: Analyze query execution plans in JSON format 33 | 34 | ### Schema Discovery Resources 35 | 36 | - List schemas with descriptions 37 | - List tables with descriptions and row counts 38 | - Get column details with data types and descriptions 39 | - View table constraints and indexes 40 | - Explore database extensions 41 | 42 | ### Data Access Resources 43 | 44 | - Sample table data (with pagination) 45 | - Get approximate row counts 46 | 47 | ### Extension Context 48 | 49 | Built-in contextual information for PostgreSQL extensions like: 50 | 51 | - **PostGIS**: Spatial data types, functions, and examples 52 | - **pgvector**: Vector similarity search functions and best practices 53 | 54 | Additional extensions can be easily added via YAML config files. 55 | 56 | ## Installation 57 | 58 | ### Prerequisites 59 | 60 | - Python 3.13+ 61 | - PostgreSQL database(s) 62 | 63 | ### Using Docker 64 | 65 | ```bash 66 | # Clone the repository 67 | git clone https://github.com/stuzero/pg-mcp-server.git 68 | cd pg-mcp-server 69 | 70 | # Build and run with Docker Compose 71 | docker-compose up -d 72 | ``` 73 | 74 | ### Manual Installation 75 | 76 | ```bash 77 | # Clone the repository 78 | git clone https://github.com/stuzero/pg-mcp-server.git 79 | cd pg-mcp-server 80 | 81 | # Install dependencies and create a virtual environment ( .venv ) 82 | uv sync 83 | 84 | # Activate the virtual environment 85 | source .venv/bin/activate # On Windows: .venv\Scripts\activate 86 | 87 | # Run the server 88 | python -m server.app 89 | ``` 90 | 91 | ## Usage 92 | 93 | ### Testing the Server 94 | 95 | The repository includes test scripts to verify server functionality: 96 | 97 | ```bash 98 | # Basic server functionality test 99 | python test.py "postgresql://username:password@hostname:port/database" 100 | 101 | # Claude-powered natural language to SQL conversion 102 | python example-clients/claude_cli.py "Show me the top 5 customers by total sales" 103 | ``` 104 | 105 | The `claude_cli.py` script requires environment variables: 106 | 107 | ``` 108 | # .env file 109 | DATABASE_URL=postgresql://username:password@hostname:port/database 110 | ANTHROPIC_API_KEY=your-anthropic-api-key 111 | PG_MCP_URL=http://localhost:8000/sse 112 | ``` 113 | 114 | 115 | 116 | ### For AI Agents 117 | 118 | Example prompt for use with agents: 119 | 120 | ``` 121 | Use the PostgreSQL MCP server to analyze the database. 122 | Available tools: 123 | - connect: Register a database connection string and get a connection ID 124 | - disconnect: Close a database connection 125 | - pg_query: Execute SQL queries using a connection ID 126 | - pg_explain: Get query execution plans 127 | 128 | You can explore schema resources via: 129 | pgmcp://{conn_id}/schemas 130 | pgmcp://{conn_id}/schemas/{schema}/tables 131 | pgmcp://{conn_id}/schemas/{schema}/tables/{table}/columns 132 | 133 | A comprehensive database description is available at this resource: 134 | pgmcp://{conn_id}/ 135 | ``` 136 | 137 | ## Architecture 138 | 139 | This server is built on: 140 | 141 | - **MCP**: The Model Context Protocol foundation 142 | - **FastMCP**: Python library for MCP 143 | - **asyncpg**: Asynchronous PostgreSQL client 144 | - **YAML**: For extension context information 145 | 146 | ## Security Considerations 147 | 148 | - The server runs in read-only mode by default (enforced via transaction settings) 149 | - Connection details are never exposed in resource URLs, only opaque connection IDs 150 | - Database credentials only need to be sent once during the initial connection 151 | 152 | ## Contributing 153 | 154 | Contributions are welcome! Areas for expansion: 155 | 156 | - Additional PostgreSQL extension context files 157 | - More schema introspection resources 158 | - Query optimization suggestions 159 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | pg-mcp: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: pg-mcp 9 | ports: 10 | - "8000:8000" 11 | environment: 12 | - LOG_LEVEL=DEBUG 13 | - PYTHONUNBUFFERED=1 14 | volumes: 15 | # For development: mount app directory to enable hot-reloading 16 | - ./server:/app/server 17 | restart: unless-stopped 18 | networks: 19 | - pg-mcp-network 20 | 21 | networks: 22 | pg-mcp-network: 23 | driver: bridge 24 | -------------------------------------------------------------------------------- /example-clients/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/example-clients/__init__.py -------------------------------------------------------------------------------- /example-clients/claude_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # example-clients/claude_cli.py 3 | import asyncio 4 | import dotenv 5 | import os 6 | import sys 7 | import json 8 | import anthropic 9 | from mcp import ClientSession 10 | from mcp.client.sse import sse_client 11 | from tabulate import tabulate 12 | 13 | # Load environment variables 14 | dotenv.load_dotenv() 15 | anthropic_api_key = os.getenv('ANTHROPIC_API_KEY') 16 | db_url = os.getenv('DATABASE_URL') 17 | pg_mcp_url = os.getenv('PG_MCP_URL', 'http://localhost:8000/sse') 18 | 19 | def clean_sql_query(sql_query): 20 | """ 21 | Clean a SQL query by properly handling escaped quotes and trailing backslashes. 22 | 23 | Args: 24 | sql_query (str): The SQL query to clean 25 | 26 | Returns: 27 | str: Cleaned SQL query 28 | """ 29 | # Handle escaped quotes - need to do this character by character to avoid issues with trailing backslashes 30 | import codecs 31 | 32 | # Use unicode_escape to properly handle all escape sequences 33 | result = codecs.decode(sql_query, 'unicode_escape') 34 | 35 | # Remove any extraneous whitespace or newlines 36 | result = result.strip() 37 | 38 | # Add trailing semicolon if missing 39 | if not result.endswith(';'): 40 | result += ';' 41 | 42 | return result 43 | 44 | async def generate_sql_with_anthropic(user_query, conn_id, session): 45 | """ 46 | Generate SQL using Claude with the server's generate_sql prompt. 47 | 48 | Args: 49 | user_query (str): The natural language query 50 | conn_id (str): The database connection ID 51 | session: The MCP client session 52 | 53 | Returns: 54 | dict: Dictionary with SQL and explanation 55 | """ 56 | try: 57 | # Use the server's generate_sql prompt 58 | prompt_response = await session.get_prompt('generate_sql', { 59 | 'conn_id': conn_id, 60 | 'nl_query': user_query 61 | }) 62 | 63 | # Process the prompt response 64 | if not hasattr(prompt_response, 'messages') or not prompt_response.messages: 65 | return { 66 | "success": False, 67 | "error": "Invalid prompt response from server" 68 | } 69 | 70 | # Convert MCP messages to format expected by Claude 71 | messages = [] 72 | for msg in prompt_response.messages: 73 | messages.append({ 74 | "role": msg.role, 75 | "content": msg.content.text if hasattr(msg.content, 'text') else str(msg.content) 76 | }) 77 | 78 | # Create the Claude client 79 | client = anthropic.Anthropic(api_key=anthropic_api_key) 80 | 81 | # Get SQL from Claude 82 | response = client.messages.create( 83 | model="claude-3-7-sonnet-20250219", 84 | max_tokens=1024, 85 | messages=messages 86 | ) 87 | 88 | # Extract the SQL from the response 89 | response_text = response.content[0].text 90 | 91 | # Look for SQL in code blocks 92 | sql_query = None 93 | 94 | if "```sql" in response_text and "```" in response_text.split("```sql", 1)[1]: 95 | sql_start = response_text.find("```sql") + 6 96 | remaining_text = response_text[sql_start:] 97 | sql_end = remaining_text.find("```") 98 | 99 | if sql_end > 0: 100 | sql_query = remaining_text[:sql_end].strip() 101 | 102 | # If still no SQL query found, check if the whole response might be SQL 103 | if not sql_query and ("SELECT" in response_text or "WITH" in response_text): 104 | for keyword in ["WITH", "SELECT", "CREATE", "INSERT", "UPDATE", "DELETE"]: 105 | if keyword in response_text: 106 | keyword_pos = response_text.find(keyword) 107 | sql_query = response_text[keyword_pos:].strip() 108 | for end_marker in ["\n\n", "```"]: 109 | if end_marker in sql_query: 110 | sql_query = sql_query[:sql_query.find(end_marker)].strip() 111 | break 112 | 113 | if not sql_query: 114 | return { 115 | "success": False, 116 | "error": "Could not extract SQL from Claude's response", 117 | "response": response_text 118 | } 119 | 120 | return { 121 | "success": True, 122 | "sql": sql_query, 123 | "explanation": "SQL generated using Claude" 124 | } 125 | 126 | except Exception as e: 127 | print(f"Error calling Anthropic API: {e}") 128 | import traceback 129 | print(traceback.format_exc()) 130 | return { 131 | "success": False, 132 | "error": f"Error: {str(e)}" 133 | } 134 | 135 | async def main(): 136 | # Check required environment variables 137 | if not db_url: 138 | print("ERROR: DATABASE_URL environment variable is not set.") 139 | sys.exit(1) 140 | 141 | if not anthropic_api_key: 142 | print("ERROR: ANTHROPIC_API_KEY environment variable is not set.") 143 | sys.exit(1) 144 | 145 | # Check command line arguments 146 | if len(sys.argv) < 2: 147 | print("Usage: python claude_cli.py 'your natural language query'") 148 | sys.exit(1) 149 | 150 | user_query = sys.argv[1] 151 | print(f"Processing query: {user_query}") 152 | 153 | try: 154 | print(f"Connecting to MCP server at {pg_mcp_url}...") 155 | 156 | # Create the SSE client context manager 157 | async with sse_client(url=pg_mcp_url) as streams: 158 | print("SSE streams established, creating session...") 159 | 160 | # Create and initialize the MCP ClientSession 161 | async with ClientSession(*streams) as session: 162 | print("Session created, initializing...") 163 | 164 | # Initialize the connection 165 | await session.initialize() 166 | print("Connection initialized!") 167 | 168 | # Use the connect tool to register the connection 169 | print("Registering connection with server...") 170 | try: 171 | connect_result = await session.call_tool( 172 | "connect", 173 | { 174 | "connection_string": db_url 175 | } 176 | ) 177 | 178 | # Extract connection ID 179 | if hasattr(connect_result, 'content') and connect_result.content: 180 | content = connect_result.content[0] 181 | if hasattr(content, 'text'): 182 | result_data = json.loads(content.text) 183 | conn_id = result_data.get('conn_id') 184 | print(f"Connection registered with ID: {conn_id}") 185 | else: 186 | print("Error: Connection response missing text content") 187 | sys.exit(1) 188 | else: 189 | print("Error: Connection response missing content") 190 | sys.exit(1) 191 | except Exception as e: 192 | print(f"Error registering connection: {e}") 193 | sys.exit(1) 194 | 195 | # Generate SQL using Claude with schema context 196 | print("Generating SQL query with Claude...") 197 | response_data = await generate_sql_with_anthropic(user_query, conn_id, session) 198 | 199 | if not response_data["success"]: 200 | print(f"Error: {response_data.get('error', 'Unknown error')}") 201 | if "response" in response_data: 202 | print(f"Claude response: {response_data['response']}") 203 | sys.exit(1) 204 | 205 | # Extract SQL and explanation 206 | sql_query = response_data.get("sql", "") 207 | explanation = response_data.get("explanation", "") 208 | 209 | # Print the results 210 | if explanation: 211 | print(f"\nExplanation:") 212 | print(f"------------") 213 | print(explanation) 214 | 215 | # Original query (as generated by Claude) 216 | print(f"\nGenerated SQL query:") 217 | print(f"------------------") 218 | print(sql_query) 219 | print(f"------------------\n") 220 | 221 | if not sql_query: 222 | print("No SQL query was generated. Exiting.") 223 | sys.exit(1) 224 | 225 | # Clean the SQL query before execution 226 | sql_query = clean_sql_query(sql_query) 227 | 228 | # Show the cleaned query 229 | print(f"Cleaned SQL query:") 230 | print(f"------------------") 231 | print(sql_query) 232 | print(f"------------------\n") 233 | 234 | # Execute the generated SQL query 235 | print("Executing SQL query...") 236 | try: 237 | result = await session.call_tool( 238 | "pg_query", 239 | { 240 | "query": sql_query, 241 | "conn_id": conn_id 242 | } 243 | ) 244 | 245 | # Extract and format results 246 | if hasattr(result, 'content') and result.content: 247 | print("\nQuery Results:") 248 | print("==============") 249 | 250 | # Extract multiple text items from content array 251 | query_results = [] 252 | for item in result.content: 253 | if hasattr(item, 'text') and item.text: 254 | try: 255 | # Parse each text item as JSON 256 | row_data = json.loads(item.text) 257 | if isinstance(row_data, list): 258 | query_results.extend(row_data) 259 | else: 260 | query_results.append(row_data) 261 | except json.JSONDecodeError: 262 | print(f"Warning: Could not parse result: {item.text}") 263 | 264 | if query_results: 265 | # Pretty print the results 266 | if isinstance(query_results, list) and len(query_results) > 0: 267 | # Use tabulate to format the table 268 | table = tabulate( 269 | query_results, 270 | headers="keys", 271 | tablefmt="pretty", 272 | numalign="right", 273 | stralign="left" 274 | ) 275 | print(table) 276 | print(f"\nTotal rows: {len(query_results)}") 277 | else: 278 | print(json.dumps(query_results, indent=2)) 279 | else: 280 | print("Query executed successfully but returned no results.") 281 | 282 | else: 283 | print("Query executed but returned no content.") 284 | except Exception as e: 285 | print(f"Error executing SQL query: {type(e).__name__}: {e}") 286 | print(f"Failed query was: {sql_query}") 287 | 288 | # Disconnect when done 289 | print("Disconnecting from database...") 290 | try: 291 | await session.call_tool( 292 | "disconnect", 293 | { 294 | "conn_id": conn_id 295 | } 296 | ) 297 | print("Successfully disconnected.") 298 | except Exception as e: 299 | print(f"Error during disconnect: {e}") 300 | 301 | except Exception as e: 302 | print(f"Error: {type(e).__name__}: {e}") 303 | import traceback 304 | print(traceback.format_exc()) 305 | sys.exit(1) 306 | 307 | if __name__ == "__main__": 308 | asyncio.run(main()) -------------------------------------------------------------------------------- /example-clients/gemini-agent-cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # example-clients/gemini-agent-cli.py 3 | import asyncio 4 | import argparse 5 | import os 6 | import json 7 | import codecs 8 | import sys 9 | import dotenv 10 | from mcp import ClientSession 11 | from mcp.client.sse import sse_client 12 | from tabulate import tabulate 13 | from pydantic_ai import Agent 14 | from pydantic_ai.models.gemini import GeminiModel 15 | from pydantic_ai.providers.google_gla import GoogleGLAProvider 16 | from httpx import AsyncClient 17 | 18 | # Load environment variables 19 | dotenv.load_dotenv() 20 | 21 | # Default values 22 | DEFAULT_MCP_URL = os.getenv("PG_MCP_URL", "http://localhost:8000/sse") 23 | DEFAULT_DB_URL = os.getenv("DATABASE_URL", "") 24 | DEFAULT_API_KEY = os.getenv("GEMINI_API_KEY", "") 25 | 26 | class AgentCLI: 27 | def __init__(self, mcp_url, db_url, api_key): 28 | self.mcp_url = mcp_url 29 | self.db_url = db_url 30 | self.api_key = api_key 31 | self.conn_id = None 32 | 33 | custom_http_client = AsyncClient(timeout=30) 34 | model = GeminiModel( 35 | 'gemini-2.0-flash', 36 | provider=GoogleGLAProvider(api_key=api_key, http_client=custom_http_client), 37 | ) 38 | self.agent = Agent(model) 39 | 40 | async def initialize(self): 41 | """Initialize the session and connect to the database.""" 42 | print(f"Connecting to MCP server at {self.mcp_url}...") 43 | async with sse_client(url=self.mcp_url) as streams: 44 | async with ClientSession(*streams) as self.session: 45 | await self.session.initialize() 46 | 47 | # Connect to database 48 | if not self.db_url: 49 | self.db_url = input("Enter PostgreSQL connection URL: ") 50 | 51 | try: 52 | connect_result = await self.session.call_tool( 53 | "connect", 54 | {"connection_string": self.db_url} 55 | ) 56 | 57 | if hasattr(connect_result, 'content') and connect_result.content: 58 | content = connect_result.content[0] 59 | if hasattr(content, 'text'): 60 | result_data = json.loads(content.text) 61 | self.conn_id = result_data.get('conn_id') 62 | print(f"Connected to database with ID: {self.conn_id}") 63 | else: 64 | print("Error: Connection response missing text content") 65 | return 66 | else: 67 | print("Error: Connection response missing content") 68 | return 69 | except Exception as e: 70 | print(f"Error establishing connection to database: {e}") 71 | return 72 | 73 | # Main interaction loop 74 | while True: 75 | try: 76 | await self.process_user_query() 77 | except KeyboardInterrupt: 78 | print("\nDisconnecting from database...") 79 | try: 80 | if self.conn_id: 81 | await self.session.call_tool( 82 | "disconnect", 83 | {"conn_id": self.conn_id} 84 | ) 85 | print("Successfully disconnected.") 86 | except Exception as e: 87 | print(f"Error during disconnect: {e}") 88 | finally: 89 | print("Exiting.") 90 | return 91 | 92 | async def process_user_query(self): 93 | """Process a natural language query from the user.""" 94 | if not self.conn_id: 95 | print("Error: Not connected to database") 96 | return 97 | 98 | # Get the user's natural language query 99 | print("\n--------------------------------------------------") 100 | user_query = input("Enter your question (or 'exit' to quit): ") 101 | 102 | if user_query.lower() in ['exit', 'quit']: 103 | raise KeyboardInterrupt() 104 | 105 | print("Generating SQL query...") 106 | 107 | try: 108 | # Get the prompt from server 109 | prompt_response = await self.session.get_prompt('generate_sql', { 110 | 'conn_id': self.conn_id, 111 | 'nl_query': user_query 112 | }) 113 | 114 | # Extract messages from prompt response 115 | if not hasattr(prompt_response, 'messages') or not prompt_response.messages: 116 | print("Error: Invalid prompt response from server") 117 | return 118 | 119 | # Convert MCP messages to format expected by Gemini 120 | messages = [] 121 | for msg in prompt_response.messages: 122 | messages.append({ 123 | "role": msg.role, 124 | "content": msg.content.text if hasattr(msg.content, 'text') else str(msg.content) 125 | }) 126 | 127 | # Use the agent with the formatted messages 128 | response = await self.agent.run(str(messages)) 129 | 130 | # Access the response content 131 | if hasattr(response, 'content'): 132 | response_text = response.content 133 | else: 134 | response_text = str(response) 135 | 136 | # Extract SQL from response 137 | sql_query = None 138 | 139 | # Look for SQL in code blocks 140 | if "```sql" in response_text and "```" in response_text.split("```sql", 1)[1]: 141 | sql_start = response_text.find("```sql") + 6 142 | remaining_text = response_text[sql_start:] 143 | sql_end = remaining_text.find("```") 144 | 145 | if sql_end > 0: 146 | sql_query = remaining_text[:sql_end].strip() 147 | 148 | # If still no SQL query found, check if the whole response might be SQL 149 | if not sql_query and ("SELECT" in response_text or "WITH" in response_text): 150 | for keyword in ["WITH", "SELECT", "CREATE", "INSERT", "UPDATE", "DELETE"]: 151 | if keyword in response_text: 152 | keyword_pos = response_text.find(keyword) 153 | sql_query = response_text[keyword_pos:].strip() 154 | for end_marker in ["\n\n", "```"]: 155 | if end_marker in sql_query: 156 | sql_query = sql_query[:sql_query.find(end_marker)].strip() 157 | break 158 | 159 | if not sql_query: 160 | print("\nCould not extract SQL from the response.") 161 | print("Response:", response_text[:100] + "..." if len(response_text) > 100 else response_text) 162 | return 163 | 164 | # Add trailing semicolon if missing 165 | sql_query = sql_query.strip() 166 | if not sql_query.endswith(';'): 167 | sql_query = sql_query + ';' 168 | 169 | # Handle escaped characters 170 | unescaped_sql_query = codecs.decode(sql_query, 'unicode_escape') 171 | 172 | # Display and confirm 173 | print("\nGenerated SQL query:") 174 | print(unescaped_sql_query) 175 | 176 | execute = input("\nDo you want to execute this query? (y/n): ") 177 | if execute.lower() != 'y': 178 | return 179 | 180 | # Execute the query 181 | print("Executing query...") 182 | result = await self.session.call_tool( 183 | "pg_query", 184 | { 185 | "query": unescaped_sql_query, 186 | "conn_id": self.conn_id 187 | } 188 | ) 189 | 190 | # Process results 191 | if hasattr(result, 'content') and result.content: 192 | query_results = [] 193 | 194 | # Extract all content items and parse the JSON 195 | for content_item in result.content: 196 | if hasattr(content_item, 'text'): 197 | try: 198 | # Parse each row from JSON 199 | row_data = json.loads(content_item.text) 200 | if isinstance(row_data, list): 201 | query_results.extend(row_data) 202 | else: 203 | query_results.append(row_data) 204 | except json.JSONDecodeError: 205 | print(f"Error parsing result item: {content_item.text[:100]}") 206 | 207 | # Display the formatted results 208 | if query_results: 209 | print("\nQuery Results:") 210 | table = tabulate( 211 | query_results, 212 | headers="keys", 213 | tablefmt="pretty" 214 | ) 215 | print(table) 216 | print(f"\nTotal rows: {len(query_results)}") 217 | else: 218 | print("\nQuery executed successfully but returned no results.") 219 | else: 220 | print("Query executed but no content returned") 221 | 222 | except Exception as e: 223 | print(f"Error: {e}") 224 | import traceback 225 | traceback.print_exc() 226 | 227 | async def main(): 228 | parser = argparse.ArgumentParser(description="Natural Language to SQL CLI for PG-MCP") 229 | parser.add_argument("--mcp-url", default=DEFAULT_MCP_URL, help="MCP server URL") 230 | parser.add_argument("--db-url", default=DEFAULT_DB_URL, help="PostgreSQL connection URL") 231 | parser.add_argument("--api-key", default=DEFAULT_API_KEY, help="Gemini API key") 232 | 233 | args = parser.parse_args() 234 | 235 | if not args.api_key: 236 | print("Error: Gemini API key is required") 237 | print("Set GEMINI_API_KEY in .env file or provide via --api-key argument") 238 | sys.exit(1) 239 | 240 | agent = AgentCLI(args.mcp_url, args.db_url, args.api_key) 241 | await agent.initialize() 242 | 243 | if __name__ == "__main__": 244 | asyncio.run(main()) -------------------------------------------------------------------------------- /get_schema.py: -------------------------------------------------------------------------------- 1 | # get_schema.py 2 | import asyncio 3 | import httpx 4 | import json 5 | import sys 6 | from mcp import ClientSession 7 | from mcp.client.sse import sse_client 8 | 9 | async def run(connection_string: str | None): 10 | """Download a comprhensive database schema from the MCP server.""" 11 | # Assuming your server is running on localhost:8000 12 | server_url = "http://localhost:8000/sse" 13 | 14 | try: 15 | print(f"Connecting to MCP server at {server_url}...") 16 | if connection_string: 17 | # Clean and sanitize the connection string 18 | clean_connection = connection_string.strip() 19 | # Only show a small part of the connection string for security 20 | masked_conn_string = clean_connection[:10] + "..." if len(clean_connection) > 10 else clean_connection 21 | print(f"Using database connection: {masked_conn_string}") 22 | 23 | # Create the SSE client context manager 24 | async with sse_client(url=server_url) as streams: 25 | print("SSE streams established, creating session...") 26 | 27 | # Create and initialize the MCP ClientSession 28 | async with ClientSession(*streams) as session: 29 | print("Session created, initializing...") 30 | # Initialize the connection 31 | await session.initialize() 32 | print("Connection initialized!") 33 | 34 | tools_response = await session.list_tools() 35 | tools = tools_response.tools 36 | 37 | if connection_string: 38 | # Check if required tools are available 39 | has_connect = any(tool.name == 'connect' for tool in tools) 40 | 41 | if not has_connect: 42 | print("\nERROR: 'connect' tool is not available on the server") 43 | return 44 | 45 | try: 46 | # Use the cleaned connection string 47 | clean_connection = connection_string.strip() 48 | 49 | # First, register the connection to get a conn_id 50 | print("\nRegistering connection with 'connect' tool...") 51 | connect_result = await session.call_tool( 52 | "connect", 53 | { 54 | "connection_string": clean_connection 55 | } 56 | ) 57 | 58 | # Extract conn_id from the response 59 | conn_id = None 60 | if hasattr(connect_result, 'content') and connect_result.content: 61 | content = connect_result.content[0] 62 | if hasattr(content, 'text'): 63 | try: 64 | result_data = json.loads(content.text) 65 | conn_id = result_data.get('conn_id') 66 | print(f"Successfully connected with connection ID: {conn_id}") 67 | except json.JSONDecodeError: 68 | print(f"Error parsing connect result: {content.text[:100]}") 69 | 70 | if not conn_id: 71 | print("Failed to get connection ID from connect tool") 72 | return 73 | 74 | # Connect to the new comprehensive schema resource 75 | print("\nConnecting to the comprehensive schema resource...") 76 | schema_resource = f"pgmcp://{conn_id}/" 77 | schema_response = await session.read_resource(schema_resource) 78 | 79 | # Process schema response 80 | response_content = None 81 | if hasattr(schema_response, 'content') and schema_response.content: 82 | response_content = schema_response.content 83 | elif hasattr(schema_response, 'contents') and schema_response.contents: 84 | response_content = schema_response.contents 85 | 86 | if response_content: 87 | content_item = response_content[0] 88 | if hasattr(content_item, 'text'): 89 | try: 90 | schema_data = json.loads(content_item.text) 91 | schemas = schema_data.get('schemas', []) 92 | 93 | print(f"Successfully retrieved schema for {len(schemas)} schemas") 94 | 95 | # Save the schema to a file for inspection 96 | output_file = f"{conn_id}.json" 97 | with open(output_file, 'w') as f: 98 | json.dump(schema_data, f, indent=2) 99 | print("\nComprehensive Database Schema saved to file") 100 | 101 | except json.JSONDecodeError: 102 | print(f"Error parsing schema response: {content_item.text[:100]}") 103 | else: 104 | print("Schema response content has no text attribute") 105 | else: 106 | print("Schema response has no content") 107 | 108 | # Test disconnect tool if available 109 | has_disconnect = any(tool.name == 'disconnect' for tool in tools) 110 | if has_disconnect and conn_id: 111 | print("\nDisconnecting...") 112 | disconnect_result = await session.call_tool( 113 | "disconnect", 114 | { 115 | "conn_id": conn_id 116 | } 117 | ) 118 | 119 | if hasattr(disconnect_result, 'content') and disconnect_result.content: 120 | content = disconnect_result.content[0] 121 | if hasattr(content, 'text'): 122 | try: 123 | result_data = json.loads(content.text) 124 | success = result_data.get('success', False) 125 | if success: 126 | print(f"Successfully disconnected connection {conn_id}") 127 | else: 128 | error = result_data.get('error', 'Unknown error') 129 | print(f"Failed to disconnect: {error}") 130 | except json.JSONDecodeError: 131 | print(f"Error parsing disconnect result: {content.text[:100]}") 132 | else: 133 | print("Disconnect call completed but no result returned") 134 | 135 | except Exception as e: 136 | print(f"Error during connection tests: {e}") 137 | else: 138 | print("\nNo connection string provided, skipping database tests") 139 | 140 | except httpx.HTTPStatusError as e: 141 | print(f"HTTP Error: {e}") 142 | print(f"Status code: {e.response.status_code}") 143 | print(f"Response body: {e.response.text}") 144 | except httpx.ConnectError: 145 | print(f"Connection Error: Could not connect to server at {server_url}") 146 | print("Make sure the server is running and the URL is correct") 147 | except Exception as e: 148 | print(f"Error: {type(e).__name__}: {e}") 149 | 150 | if __name__ == "__main__": 151 | # Get database connection string from command line argument 152 | connection_string = sys.argv[1] if len(sys.argv) > 1 else None 153 | asyncio.run(run(connection_string)) -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pg-mcp" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.13" 7 | dependencies = [ 8 | "anthropic>=0.49.0", 9 | "asyncpg>=0.30.0", 10 | "jinja2>=3.1.6", 11 | "mcp[cli]>=1.5.0", 12 | "pydantic-ai>=0.0.46", 13 | "sqlglot>=26.16.2", 14 | "tabulate>=0.9.0", 15 | ] 16 | -------------------------------------------------------------------------------- /server/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/__init__.py -------------------------------------------------------------------------------- /server/app.py: -------------------------------------------------------------------------------- 1 | # server/app.py 2 | import os 3 | from server.logging_config import configure_logging, get_logger, configure_uvicorn_logging 4 | 5 | # Configure logging first thing to capture all subsequent log messages 6 | log_level = os.environ.get("LOG_LEVEL", "DEBUG") 7 | configure_logging(level=log_level) 8 | logger = get_logger("app") 9 | 10 | # Import MCP instance and other components after logging is configured 11 | from server.config import mcp, global_db 12 | 13 | # Import registration functions 14 | from server.resources.schema import register_schema_resources 15 | from server.resources.data import register_data_resources 16 | from server.resources.extensions import register_extension_resources 17 | from server.tools.connection import register_connection_tools 18 | from server.tools.query import register_query_tools 19 | from server.tools.viz import register_viz_tools 20 | from server.prompts.natural_language import register_natural_language_prompts 21 | from server.prompts.data_visualization import register_data_visualization_prompts 22 | 23 | # Register tools and resources with the MCP server 24 | logger.info("Registering resources and tools") 25 | register_schema_resources() # Schema-related resources (schemas, tables, columns) 26 | register_extension_resources() 27 | register_data_resources() # Data-related resources (sample, rowcount, etc.) 28 | register_connection_tools() # Connection management tools 29 | register_query_tools() 30 | register_viz_tools() # Visualization tools 31 | register_natural_language_prompts() # Natural language to SQL prompts 32 | register_data_visualization_prompts() # Data visualization prompts 33 | 34 | 35 | from contextlib import asynccontextmanager 36 | from starlette.applications import Starlette 37 | from starlette.routing import Mount 38 | import uvicorn 39 | 40 | @asynccontextmanager 41 | async def starlette_lifespan(app): 42 | logger.info("Starlette application starting up") 43 | yield 44 | logger.info("Starlette application shutting down, closing all database connections") 45 | await global_db.close() 46 | 47 | if __name__ == "__main__": 48 | logger.info("Starting MCP server with SSE transport") 49 | app = Starlette( 50 | routes=[Mount('/', app=mcp.sse_app())], 51 | lifespan=starlette_lifespan 52 | ) 53 | 54 | # Configure Uvicorn with our logging setup 55 | uvicorn_log_config = configure_uvicorn_logging(log_level) 56 | 57 | # Use our configured log level for Uvicorn 58 | uvicorn.run( 59 | app, 60 | host="0.0.0.0", 61 | port=8000, 62 | log_level=log_level.lower(), 63 | log_config=uvicorn_log_config 64 | ) -------------------------------------------------------------------------------- /server/config.py: -------------------------------------------------------------------------------- 1 | # server/config.py 2 | from mcp.server.fastmcp import FastMCP 3 | from contextlib import asynccontextmanager 4 | from collections.abc import AsyncIterator 5 | from server.database import Database 6 | from server.logging_config import configure_logging, get_logger 7 | 8 | # Initialize logging with our custom configuration 9 | logger = get_logger("instance") 10 | 11 | global_db = Database() 12 | logger.info("Global database manager initialized") 13 | 14 | @asynccontextmanager 15 | async def app_lifespan(app: FastMCP) -> AsyncIterator[dict]: 16 | """Manage application lifecycle.""" 17 | mcp.state = {"db": global_db} 18 | logger.info("Application startup - using global database manager") 19 | 20 | try: 21 | yield {"db": global_db} 22 | finally: 23 | # Don't close connections on individual session end 24 | pass 25 | 26 | # Create the MCP instance 27 | mcp = FastMCP( 28 | "pg-mcp-server", 29 | debug=True, 30 | lifespan=app_lifespan, 31 | dependencies=["asyncpg", "mcp"] 32 | ) -------------------------------------------------------------------------------- /server/database.py: -------------------------------------------------------------------------------- 1 | # server/database.py 2 | import uuid 3 | import urllib.parse 4 | import asyncpg 5 | from contextlib import asynccontextmanager 6 | from mcp.server.fastmcp.utilities.logging import get_logger 7 | 8 | logger = get_logger("pg-mcp.database") 9 | 10 | class Database: 11 | def __init__(self): 12 | """Initialize the database manager with no default connections.""" 13 | self._pools = {} # Dictionary to store connection pools by connection ID 14 | self._connection_map = {} # Map connection IDs to actual connection strings 15 | self._reverse_map = {} # Map connection strings to their IDs 16 | 17 | def postgres_connection_to_uuid(self, connection_string, namespace=uuid.NAMESPACE_URL): 18 | """ 19 | Convert a PostgreSQL connection string into a deterministic Version 5 UUID. 20 | Includes both connection credentials (netloc) and database name (path). 21 | 22 | Args: 23 | connection_string: Full PostgreSQL connection string 24 | namespace: UUID namespace (default is URL namespace) 25 | 26 | Returns: 27 | str: UUID representing the connection 28 | """ 29 | # Parse the connection string 30 | parsed = urllib.parse.urlparse(connection_string) 31 | 32 | # Extract the netloc (user:password@host:port) and path (database name) 33 | # The path typically starts with a slash, so we strip it 34 | connection_id_string = parsed.netloc + parsed.path 35 | 36 | # Create a Version 5 UUID (SHA-1 based) 37 | result_uuid = uuid.uuid5(namespace, connection_id_string) 38 | 39 | return str(result_uuid) 40 | 41 | 42 | def register_connection(self, connection_string): 43 | """ 44 | Register a connection string and return its UUID identifier. 45 | 46 | Args: 47 | connection_string: PostgreSQL connection string 48 | 49 | Returns: 50 | str: UUID identifier for this connection 51 | """ 52 | if not connection_string.startswith("postgresql://"): 53 | connection_string = f"postgresql://{connection_string}" 54 | 55 | # Check if we already have this connection registered 56 | if connection_string in self._reverse_map: 57 | return self._reverse_map[connection_string] 58 | 59 | # Generate a new UUID 60 | conn_id = self.postgres_connection_to_uuid(connection_string) 61 | 62 | # Store mappings in both directions 63 | self._connection_map[conn_id] = connection_string 64 | self._reverse_map[connection_string] = conn_id 65 | 66 | logger.info(f"Registered new connection with ID {conn_id}") 67 | 68 | return conn_id 69 | 70 | def get_connection_string(self, conn_id): 71 | """Get the actual connection string for a connection ID.""" 72 | if conn_id not in self._connection_map: 73 | raise ValueError(f"Unknown connection ID: {conn_id}") 74 | 75 | return self._connection_map[conn_id] 76 | 77 | async def initialize(self, conn_id): 78 | """Initialize a connection pool for the given connection ID.""" 79 | if not conn_id: 80 | raise ValueError("Connection ID is required") 81 | 82 | if conn_id not in self._pools: 83 | # Get the actual connection string 84 | connection_string = self.get_connection_string(conn_id) 85 | 86 | logger.info(f"Creating new database connection pool for connection ID {conn_id}") 87 | self._pools[conn_id] = await asyncpg.create_pool( 88 | connection_string, 89 | min_size=2, 90 | max_size=10, 91 | command_timeout=60.0, 92 | # Read-only mode 93 | server_settings={"default_transaction_read_only": "true"} 94 | ) 95 | 96 | return self 97 | 98 | @asynccontextmanager 99 | async def get_connection(self, conn_id): 100 | """Get a database connection from the pool for the given connection ID.""" 101 | if not conn_id: 102 | raise ValueError("Connection ID is required") 103 | 104 | if conn_id not in self._pools: 105 | await self.initialize(conn_id) 106 | 107 | async with self._pools[conn_id].acquire() as conn: 108 | yield conn 109 | 110 | async def close(self, conn_id=None): 111 | """ 112 | Close a specific or all database connection pools. 113 | 114 | Args: 115 | conn_id: If provided, close only this specific connection pool. 116 | If None, close all connection pools. 117 | """ 118 | if conn_id: 119 | if conn_id in self._pools: 120 | logger.info(f"Closing database connection pool for connection ID {conn_id}") 121 | await self._pools[conn_id].close() 122 | del self._pools[conn_id] 123 | else: 124 | # Close all connection pools 125 | logger.info("Closing all database connection pools") 126 | for id, pool in list(self._pools.items()): 127 | logger.info(f"Closing connection pool for ID {id}") 128 | await pool.close() 129 | del self._pools[id] -------------------------------------------------------------------------------- /server/logging_config.py: -------------------------------------------------------------------------------- 1 | # server/logging_config.py 2 | import logging 3 | import sys 4 | import os 5 | import re 6 | from datetime import datetime 7 | import logging.handlers 8 | 9 | from rich.logging import RichHandler 10 | from rich.console import Console 11 | from rich.theme import Theme 12 | from rich.highlighter import RegexHighlighter 13 | from rich.style import Style 14 | 15 | # Custom highlighter for important patterns 16 | class MCPHighlighter(RegexHighlighter): 17 | """Highlights important patterns in log messages.""" 18 | 19 | # Define regex patterns and their styles 20 | highlights = [ 21 | # Session IDs - bright magenta 22 | r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', 23 | # HTTP Status codes - green for success 24 | r'(200 OK|201 Created|204 No Content)', 25 | # Key phrases - bright blue 26 | r'(Created new session|Starting SSE|Yielding read and write streams|Sent endpoint event)', 27 | ] 28 | 29 | base_style = Style() 30 | session_id_style = Style(color="bright_magenta") 31 | http_ok_style = Style(color="bright_green") 32 | key_phrase_style = Style(color="bright_blue") 33 | 34 | def highlight(self, text): 35 | """Apply highlighting to text.""" 36 | text = super().highlight(text) 37 | 38 | # Apply custom highlighting for each pattern 39 | text = re.sub( 40 | r'([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})', 41 | lambda m: f"[bright_magenta]{m.group(1)}[/bright_magenta]", 42 | text 43 | ) 44 | 45 | text = re.sub( 46 | r'(200 OK|201 Created|204 No Content)', 47 | lambda m: f"[bright_green]{m.group(1)}[/bright_green]", 48 | text 49 | ) 50 | 51 | text = re.sub( 52 | r'(Created new session|Starting SSE|Yielding read and write streams|Sent endpoint event)', 53 | lambda m: f"[bright_blue]{m.group(1)}[/bright_blue]", 54 | text 55 | ) 56 | 57 | return text 58 | 59 | # Create a custom theme for Rich 60 | custom_theme = Theme({ 61 | "info": "green", 62 | "warning": "yellow", 63 | "error": "bold red", 64 | "debug": "cyan", 65 | "server.sse": "bright_blue", 66 | "lowlevel.server": "bright_cyan", 67 | "resources": "bright_green", 68 | "tools": "bright_magenta", 69 | "asyncio": "bright_yellow", 70 | }) 71 | 72 | def get_component_style(name): 73 | """Get the style for a component based on its name.""" 74 | if "server.sse" in name: 75 | return "bright_blue" 76 | elif "lowlevel.server" in name: 77 | return "bright_cyan" 78 | elif "resources" in name: 79 | return "bright_green" 80 | elif "tools" in name: 81 | return "bright_magenta" 82 | elif "asyncio" in name: 83 | return "bright_yellow" 84 | else: 85 | return "bright_black" 86 | 87 | class MCPLogFormatter(logging.Formatter): 88 | """Formatter for non-Rich log handlers that maintains consistent format.""" 89 | 90 | def format(self, record): 91 | # Extract component info from the original record 92 | name_parts = record.name.split('.') 93 | 94 | # Determine component 95 | if len(name_parts) > 1: 96 | component = record.name 97 | else: 98 | component = record.name 99 | 100 | # Add component to record 101 | record.component = f"[{component}]" 102 | 103 | # Get source file reference 104 | source_info = "" 105 | if hasattr(record, 'pathname') and record.pathname: 106 | source_file = os.path.basename(record.pathname) 107 | source_info = f"({source_file}:{record.lineno})" 108 | record.source_info = source_info 109 | 110 | # Format using the base formatter 111 | return super().format(record) 112 | 113 | def configure_logging(level="INFO", log_file=None): 114 | """ 115 | Configure logging with Rich formatting for the terminal 116 | and regular formatting for log files. 117 | 118 | Args: 119 | level: The log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) 120 | log_file: Optional path to a log file 121 | """ 122 | # Get log level from environment if available 123 | env_level = os.environ.get("LOG_LEVEL", level) 124 | numeric_level = getattr(logging, env_level.upper(), logging.INFO) 125 | 126 | # Configure root logger 127 | root_logger = logging.getLogger() 128 | root_logger.setLevel(numeric_level) 129 | 130 | # Remove existing handlers to prevent duplication 131 | for handler in root_logger.handlers[:]: 132 | root_logger.removeHandler(handler) 133 | 134 | # Create Rich console with custom highlighting 135 | console = Console(theme=custom_theme, highlighter=MCPHighlighter()) 136 | 137 | # Rich handler for console output 138 | rich_handler = RichHandler( 139 | console=console, 140 | show_time=True, 141 | show_level=True, 142 | show_path=True, # We'll show the path in our format 143 | enable_link_path=True, 144 | markup=True, 145 | omit_repeated_times=False, 146 | rich_tracebacks=True, 147 | tracebacks_show_locals=True, 148 | log_time_format="%Y-%m-%d %H:%M:%S.%f" 149 | ) 150 | 151 | # Set the format for Rich handler (minimal since Rich adds its own formatting) 152 | rich_format = "%(message)s" 153 | rich_handler.setFormatter(logging.Formatter(rich_format)) 154 | rich_handler.setLevel(numeric_level) 155 | root_logger.addHandler(rich_handler) 156 | 157 | # Add file handler if log file is specified 158 | if log_file: 159 | # Ensure log directory exists 160 | log_dir = os.path.dirname(log_file) 161 | if log_dir and not os.path.exists(log_dir): 162 | os.makedirs(log_dir) 163 | 164 | # Define format for file logs (no color codes) 165 | file_format = "%(asctime)s | %(levelname)s %(component)s | %(message)s %(source_info)s" 166 | 167 | # Create and add rotating file handler 168 | file_handler = logging.handlers.RotatingFileHandler( 169 | log_file, 170 | maxBytes=10 * 1024 * 1024, # 10 MB 171 | backupCount=5, 172 | encoding='utf-8' 173 | ) 174 | file_handler.setFormatter(MCPLogFormatter(file_format)) 175 | file_handler.setLevel(numeric_level) 176 | root_logger.addHandler(file_handler) 177 | 178 | # Create a logger for the pg-mcp application 179 | app_logger = logging.getLogger("pg-mcp") 180 | app_logger.setLevel(numeric_level) 181 | 182 | # Log startup message 183 | app_logger.info(f"Logging configured with level {env_level}") 184 | 185 | return root_logger 186 | 187 | def get_logger(name): 188 | """ 189 | Get a logger with the given name, preserving the original naming scheme. 190 | 191 | Args: 192 | name: Logger name 193 | 194 | Returns: 195 | A configured logger instance 196 | """ 197 | return logging.getLogger(name) 198 | 199 | def configure_uvicorn_logging(log_level="info"): 200 | """ 201 | Configure Uvicorn's logging to match our style. 202 | 203 | Args: 204 | log_level: Log level for Uvicorn 205 | 206 | Returns: 207 | Dictionary with Uvicorn log config 208 | """ 209 | # Map our log level format to Uvicorn's 210 | level = log_level.upper() 211 | if level == "DEBUG": 212 | log_level = "debug" 213 | elif level == "INFO": 214 | log_level = "info" 215 | elif level == "WARNING": 216 | log_level = "warning" 217 | elif level == "ERROR": 218 | log_level = "error" 219 | elif level == "CRITICAL": 220 | log_level = "critical" 221 | 222 | # Use default Uvicorn logging config to avoid conflicts 223 | return { 224 | "version": 1, 225 | "disable_existing_loggers": False, 226 | "log_level": log_level, 227 | } -------------------------------------------------------------------------------- /server/prompts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/prompts/__init__.py -------------------------------------------------------------------------------- /server/prompts/data_visualization.py: -------------------------------------------------------------------------------- 1 | # server/prompts/data_visualization.py 2 | import importlib.resources 3 | import jinja2 4 | from server.config import mcp 5 | from server.logging_config import get_logger 6 | from mcp.server.fastmcp.prompts import base 7 | from server.tools.viz import get_query_metadata 8 | 9 | logger = get_logger("pg-mcp.prompts.data_visualization") 10 | 11 | # Set up Jinja2 template environment using importlib.resources 12 | template_env = jinja2.Environment( 13 | loader=jinja2.FunctionLoader(lambda name: 14 | importlib.resources.read_text('server.prompts.templates', name) 15 | ) 16 | ) 17 | 18 | def register_data_visualization_prompts(): 19 | """Register data visualization prompts with the MCP server.""" 20 | logger.debug("Registering data visualization prompts") 21 | 22 | @mcp.prompt() 23 | async def generate_vega(conn_id: str, nl_query: str, sql_query: str): 24 | """ 25 | Prompt to guide AI agents in generating appropriate Vega-Lite visualizations 26 | based on SQL query results, metadata, and database context. 27 | 28 | Args: 29 | conn_id: The connection ID for the database 30 | nl_query: The original natural language query 31 | sql_query: The SQL query to visualize 32 | 33 | Returns: 34 | A prompt message that will guide the AI in generating a Vega-Lite specification 35 | """ 36 | # Generate query metadata directly using the updated function 37 | logger.debug(f"Generating query metadata") 38 | query_metadata = await get_query_metadata(conn_id, sql_query) 39 | logger.debug(f"Query metadata generated successfully") 40 | 41 | # Get database information for context 42 | database_resource = f"pgmcp://{conn_id}/" 43 | database_response = await mcp.read_resource(database_resource) 44 | 45 | database_info = database_response[0].content if database_response else "{}" 46 | 47 | # Render the prompt template 48 | prompt_template = template_env.get_template("generate_vega.md.jinja2") 49 | prompt_text = prompt_template.render( 50 | database_info=database_info, 51 | nl_query=nl_query, 52 | sql_query=sql_query, 53 | query_metadata=query_metadata 54 | ) 55 | 56 | return [base.UserMessage(prompt_text)] -------------------------------------------------------------------------------- /server/prompts/natural_language.py: -------------------------------------------------------------------------------- 1 | # server/prompts/natural_language.py 2 | import importlib.resources 3 | import jinja2 4 | from server.config import mcp 5 | from server.logging_config import get_logger 6 | from mcp.server.fastmcp.prompts import base 7 | 8 | logger = get_logger("pg-mcp.prompts.natural_language") 9 | 10 | # Set up Jinja2 template environment using importlib.resources 11 | template_env = jinja2.Environment( 12 | loader=jinja2.FunctionLoader(lambda name: 13 | importlib.resources.read_text('server.prompts.templates', name) 14 | ) 15 | ) 16 | 17 | def register_natural_language_prompts(): 18 | """Register prompts with the MCP server.""" 19 | logger.debug("Registering natural language to SQL prompts") 20 | 21 | @mcp.prompt() 22 | async def generate_sql(conn_id: str, nl_query: str): 23 | """ 24 | Prompt to guide AI agents in converting natural language queries to SQL with PostgreSQL. 25 | 26 | Args: 27 | conn_id: The connection ID for the database 28 | nl_query: The natural language query to convert to SQL 29 | """ 30 | # Get database information 31 | database_resource = f"pgmcp://{conn_id}/" 32 | database_response = await mcp.read_resource(database_resource) 33 | 34 | database_info = database_response[0].content if database_response else "{}" 35 | 36 | # Render the prompt template 37 | prompt_template = template_env.get_template("generate_sql.md.jinja2") 38 | prompt_text = prompt_template.render( 39 | database_info=database_info, 40 | nl_query=nl_query 41 | ) 42 | 43 | return [base.UserMessage(prompt_text)] 44 | 45 | @mcp.prompt() 46 | async def validate_nl(conn_id: str, nl_query: str): 47 | """ 48 | Prompt to determine if the user's query is answerable by the database 49 | and that a query can be generated. User input is evaluated on 50 | clarity/vagueness and also relevancy to the schema. 51 | 52 | Args: 53 | conn_id: The connection ID for the database 54 | nl_query: The natural language query to validate 55 | """ 56 | # Get database information 57 | database_resource = f"pgmcp://{conn_id}/" 58 | database_response = await mcp.read_resource(database_resource) 59 | 60 | database_info = database_response[0].content if database_response else "{}" 61 | 62 | # Render the prompt template 63 | prompt_template = template_env.get_template("validate_nl.md.jinja2") 64 | prompt_text = prompt_template.render( 65 | database_info=database_info, 66 | nl_query=nl_query 67 | ) 68 | 69 | return [base.UserMessage(prompt_text)] 70 | 71 | @mcp.prompt() 72 | async def justify_sql(conn_id: str, nl_query: str, sql_query: str): 73 | """ 74 | Prompt to evaluate if a SQL query correctly answers a natural language question 75 | and provide an explanation of how the query works. 76 | 77 | Args: 78 | conn_id: The connection ID for the database 79 | nl_query: The original natural language query 80 | sql_query: The SQL query to evaluate and explain 81 | """ 82 | # Get database information 83 | database_resource = f"pgmcp://{conn_id}/" 84 | database_response = await mcp.read_resource(database_resource) 85 | 86 | database_info = database_response[0].content if database_response else "{}" 87 | 88 | # Render the prompt template 89 | prompt_template = template_env.get_template("justify_sql.md.jinja2") 90 | prompt_text = prompt_template.render( 91 | database_info=database_info, 92 | nl_query=nl_query, 93 | sql_query=sql_query 94 | ) 95 | 96 | return [base.UserMessage(prompt_text)] -------------------------------------------------------------------------------- /server/prompts/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/prompts/templates/__init__.py -------------------------------------------------------------------------------- /server/prompts/templates/generate_sql.md.jinja2: -------------------------------------------------------------------------------- 1 | You are an expert PostgreSQL database query assistant. Your task is to: 2 | Analyze the database schema information provided 3 | Convert natural language questions into optimized PostgreSQL SQL queries 4 | Use appropriate JOINs, WHERE clauses, and aggregations based on the schema 5 | Database Information 6 | ```json 7 | {{database_info}} 8 | ``` 9 | Response Format 10 | Your response must contain ONLY the PostgreSQL SQL query inside sql code blocks. 11 | Do not include any explanations, analysis, or comments outside of the SQL code block. 12 | Keep your query concise and focused on answering the specific question. 13 | Example response format: 14 | ```sql 15 | SELECT column1, column2 16 | FROM table1 17 | JOIN table2 ON table1.id = table2.id 18 | WHERE condition = true; 19 | ``` 20 | Query Writing Guidelines 21 | 22 | Start by examining the database schema to understand table relationships 23 | Use explicit column names rather than SELECT * 24 | Use appropriate JOIN types (INNER, LEFT, RIGHT) based on the relationship 25 | For filtering, use appropriate operators and functions (=, LIKE, IN, etc.) 26 | Use CTEs (WITH clauses) for complex queries to improve readability 27 | Include LIMIT clauses for queries that might return large result sets 28 | Prefer schema-qualified table names (schema_name.table_name) 29 | For performance, consider using indexed columns in WHERE clauses 30 | End all SQL queries with a semicolon 31 | Make sure your SQL query fits within a single response (don't create excessively long queries) 32 | 33 | PostgreSQL-Specific Features 34 | 35 | Use jsonb_* functions for JSON data handling 36 | Consider using LATERAL joins for row-based subqueries 37 | Use array functions (unnest, array_agg) for array operations 38 | Use window functions (OVER, RANK, etc.) for analytic queries 39 | For full-text search, utilize tsvector, tsquery, and indexing 40 | 41 | Natural Language Query 42 | "{{nl_query}}" -------------------------------------------------------------------------------- /server/prompts/templates/generate_vega.md.jinja2: -------------------------------------------------------------------------------- 1 | You are an expert in data visualization who will create an appropriate Vega-Lite specification based on SQL query results and database context. 2 | 3 | ## Database Schema Information 4 | ```json 5 | {{database_info}} 6 | ``` 7 | 8 | ## Natural Language Query 9 | "{{nl_query}}" 10 | 11 | ## SQL Query 12 | ```sql 13 | {{sql_query}} 14 | ``` 15 | 16 | ## Query Metadata 17 | ```json 18 | {{query_metadata}} 19 | ``` 20 | 21 | ## Your Task 22 | 23 | Generate a Vega-Lite specification that creates an appropriate visualization for the data returned by this SQL query. The visualization should: 24 | 25 | 1. Effectively communicate the insights sought in the natural language query 26 | 2. Use appropriate chart types based on the field data types (temporal, quantitative, nominal) 27 | 3. Include proper axis labels, chart title, and other annotations 28 | 4. Use a pleasing and accessible color scheme 29 | 5. Be optimized for the data size indicated in the metadata 30 | 6. Leverage appropriate Vega-Lite features for the data structure 31 | 32 | ## Response Format 33 | 34 | **IMPORTANT**: Your response must be a valid JSON object inside a code block, in the following format: 35 | 36 | ```json 37 | { 38 | "vegaLiteSpec": { 39 | "$schema": "https://vega.github.io/schema/vega-lite/v6.json", 40 | "data": {"name": "results"}, 41 | "mark": "CHART_TYPE", 42 | "encoding": { 43 | // Your encoding properties here 44 | }, 45 | "title": "CHART_TITLE" 46 | }, 47 | "explanation": "Brief explanation of why you chose this visualization type", 48 | "limitations": "Any limitations or assumptions made" 49 | } 50 | ``` 51 | 52 | Do NOT include any explanatory text, comments, or markdown outside of this JSON code block. The JSON must be properly formatted with double quotes around all property names and string values. Do not use single quotes or unquoted property names. 53 | 54 | ## Guidelines for Chart Type Selection 55 | 56 | - **Time Series Data**: Use line charts for temporal data trends 57 | - **Categorical Comparisons**: Use bar charts for comparing values across categories 58 | - **Distributions**: Use histograms or density plots for distributions 59 | - **Part-to-Whole Relationships**: Use pie or donut charts (only for small number of categories) 60 | - **Correlations**: Use scatter plots for relationships between numeric variables 61 | - **Geographic Data**: Use map visualizations for spatial data 62 | - **Multi-dimensional Data**: Consider faceted views or small multiples 63 | 64 | ## Special Considerations 65 | 66 | - For large datasets (>1000 rows), use aggregation or sampling 67 | - When there are many fields, prioritize those most relevant to the query 68 | - For GROUP BY queries, ensure the visualization reflects the grouped structure 69 | - For queries with multiple measures, consider compound visualizations 70 | - When date ranges are large, use appropriate temporal units (days, months, years) 71 | - Ensure the visualization is colorblind-friendly 72 | 73 | ## Examples 74 | 75 | A basic bar chart visualizing sales by category: 76 | ```json 77 | { 78 | "vegaLiteSpec": { 79 | "$schema": "https://vega.github.io/schema/vega-lite/v6.json", 80 | "data": {"name": "results"}, 81 | "mark": "bar", 82 | "encoding": { 83 | "x": {"field": "category", "type": "nominal", "title": "Product Category"}, 84 | "y": {"field": "total_sales", "type": "quantitative", "title": "Total Sales ($)"}, 85 | "color": {"field": "category", "type": "nominal", "legend": null} 86 | }, 87 | "title": "Sales by Product Category" 88 | }, 89 | "explanation": "I chose a bar chart because the query compares sales totals across categorical product groups. The bar chart clearly shows the relative differences between categories.", 90 | "limitations": "This visualization works best with a small number of categories. If there are many categories, consider using a horizontal bar chart with categories sorted by value." 91 | } 92 | ``` 93 | 94 | A time series visualization: 95 | ```json 96 | { 97 | "vegaLiteSpec": { 98 | "$schema": "https://vega.github.io/schema/vega-lite/v6.json", 99 | "data": {"name": "results"}, 100 | "mark": "line", 101 | "encoding": { 102 | "x": {"field": "date", "type": "temporal", "title": "Month"}, 103 | "y": {"field": "revenue", "type": "quantitative", "title": "Monthly Revenue ($)"} 104 | }, 105 | "title": "Monthly Revenue Trend" 106 | }, 107 | "explanation": "I chose a line chart because the query shows revenue values over time, and line charts are ideal for showing trends in temporal data.", 108 | "limitations": "This visualization assumes the time intervals are regular. Irregular intervals might be better visualized with points connected by lines." 109 | } 110 | ``` 111 | 112 | Remember to match your visualization to the user's intent as expressed in the natural language query. The visualization should directly answer the question being asked. -------------------------------------------------------------------------------- /server/prompts/templates/justify_sql.md.jinja2: -------------------------------------------------------------------------------- 1 | You are an expert PostgreSQL database analyzer tasked with evaluating if a given SQL query correctly answers a natural language question based on the database schema. 2 | 3 | Your task is to: 4 | - Analyze the database schema information provided 5 | - Determine if the SQL query correctly and completely answers the natural language question 6 | - Explain how the SQL query works in relation to the question 7 | - Identify any potential issues, limitations, or edge cases 8 | 9 | Database Information 10 | ```json 11 | {{database_info}} 12 | ``` 13 | 14 | Natural Language Query 15 | "{{nl_query}}" 16 | 17 | SQL Query to Evaluate 18 | ```sql 19 | {{sql_query}} 20 | ``` 21 | 22 | Response Format 23 | Your response must be a JSON object with the following fields: 24 | - "correct": (boolean) true if the SQL query correctly answers the natural language question, false otherwise 25 | - "explanation": (string) a thorough explanation of how the SQL query works and how it answers the natural language question 26 | - "components": (object) an analysis of each major SQL component: 27 | - "select": explanation of what data is being retrieved 28 | - "from_joins": explanation of table relationships being used 29 | - "filters": explanation of filtering conditions 30 | - "aggregations": explanation of any aggregation or grouping 31 | - "sorting": explanation of result ordering 32 | - "issues": (array) any potential issues or limitations with the query 33 | - "improvements": (array) suggested improvements for better performance or clarity 34 | - "confidence": (number) your confidence level from 0.0 to 1.0 that your assessment is correct 35 | 36 | Example response format: 37 | ```json 38 | { 39 | "correct": true, 40 | "explanation": "This query correctly identifies the top 5 customers by total order amount by joining the customers table with orders and order_items, calculating the sum of prices for each customer, and limiting to 5 results ordered by this sum.", 41 | "components": { 42 | "select": "The query selects customer names and the sum of their order totals", 43 | "from_joins": "Joins customers to orders on customer_id, then to order_items on order_id to access line item prices", 44 | "filters": "No filters are applied as the query asks for all customers", 45 | "aggregations": "Uses SUM() with GROUP BY customer_id to calculate total spending per customer", 46 | "sorting": "Orders by the sum of order amounts in descending order to get highest spenders first" 47 | }, 48 | "issues": [ 49 | "Query doesn't handle NULL values in price or quantity columns", 50 | "No date filtering is applied, so this represents all-time spending" 51 | ], 52 | "improvements": [ 53 | "Consider adding a WHERE clause to filter orders by a relevant date range", 54 | "Add COALESCE() to handle potential NULL values in calculations" 55 | ], 56 | "confidence": 0.95 57 | } 58 | ``` 59 | 60 | Evaluation Guidelines 61 | - Trace how the tables and joins in the query connect to provide the requested information 62 | - Verify that all necessary tables to answer the question are included 63 | - Check if appropriate filtering conditions are applied to match the question specifics 64 | - Confirm that the right columns are selected to provide the requested information 65 | - Evaluate if aggregations (SUM, COUNT, AVG, etc.) are used correctly when needed 66 | - Assess if the sorting order matches what the question is asking for 67 | - Consider whether the query handles potential NULL values, duplicates, or edge cases 68 | - Determine if the query uses optimal PostgreSQL features for the task -------------------------------------------------------------------------------- /server/prompts/templates/validate_nl.md.jinja2: -------------------------------------------------------------------------------- 1 | You are an expert PostgreSQL database analyst tasked with determining if a user's natural language query can be answered using the available database schema. 2 | 3 | Your task is to: 4 | - Analyze the database schema information provided 5 | - Determine if the natural language query can be answered with the available tables and relationships 6 | - Evaluate the clarity and specificity of the query 7 | - Recommend improvements if needed 8 | 9 | Database Information 10 | ```json 11 | {{database_info}} 12 | ``` 13 | 14 | Natural Language Query 15 | "{{nl_query}}" 16 | 17 | Response Format 18 | Your response must contain ONLY a JSON object with the following fields: 19 | - "answerable": (boolean) true if the query can be answered with this schema, false otherwise 20 | - "reason": (string) explanation of your determination, focusing on schema compatibility 21 | - "missing_info": (string) any information missing from the query that would be needed 22 | - "ambiguities": (array) list of any ambiguous parts of the query that could be interpreted multiple ways 23 | - "suggestion": (string) a specific rephrasing of the query if improvements are needed 24 | - "relevant_tables": (array) list of table names from the schema that would be needed to answer this query 25 | - "confidence": (number) your confidence level from 0.0 to 1.0 that your assessment is correct 26 | 27 | Example response format: 28 | ```json 29 | { 30 | "answerable": true, 31 | "reason": "The query can be answered using the customers and orders tables which contain the necessary information about customer spending.", 32 | "missing_info": "Time period for the calculation is not specified.", 33 | "ambiguities": ["Does 'top' refer to most frequent purchasers or highest total spend?"], 34 | "suggestion": "What are the top 5 customers by total order amount in the past year?", 35 | "relevant_tables": ["customers", "orders", "order_items"], 36 | "confidence": 0.95 37 | } 38 | ``` 39 | 40 | Evaluation Guidelines 41 | - Examine the schema carefully to understand table relationships and available data 42 | - Check if all entities mentioned in the query exist in the schema 43 | - Identify if necessary join relationships exist between relevant tables 44 | - Consider whether necessary columns exist to filter, group, or aggregate as requested 45 | - Look for ambiguities that could lead to multiple interpretations 46 | - Evaluate if the query is too vague or too specific for the available data 47 | - Determine if time-based queries can be satisfied with available date/time fields 48 | - Consider PostgreSQL-specific capabilities when evaluating feasibility 49 | -------------------------------------------------------------------------------- /server/resources/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/resources/__init__.py -------------------------------------------------------------------------------- /server/resources/data.py: -------------------------------------------------------------------------------- 1 | # server/resources/data.py 2 | from server.config import mcp 3 | from server.logging_config import get_logger 4 | from server.tools.query import execute_query 5 | 6 | logger = get_logger("pg-mcp.resources.data") 7 | 8 | def register_data_resources(): 9 | """Register database data resources with the MCP server.""" 10 | logger.debug("Registering data resources") 11 | 12 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/tables/{table}/sample") 13 | async def sample_table_data(conn_id: str, schema: str, table: str): 14 | """Get a sample of data from a specific table.""" 15 | # First, sanitize the schema and table names 16 | sanitize_query = "SELECT quote_ident($1) AS schema_ident, quote_ident($2) AS table_ident" 17 | identifiers = await execute_query(sanitize_query, conn_id, [schema, table]) 18 | 19 | schema_ident = identifiers[0]['schema_ident'] 20 | table_ident = identifiers[0]['table_ident'] 21 | 22 | # Build the sample query with sanitized identifiers 23 | sample_query = f"SELECT * FROM {schema_ident}.{table_ident} LIMIT 10" 24 | return await execute_query(sample_query, conn_id) 25 | 26 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/tables/{table}/rowcount") 27 | async def get_table_rowcount(conn_id: str, schema: str, table: str): 28 | """Get the approximate row count for a specific table.""" 29 | # First, sanitize the schema and table names 30 | sanitize_query = "SELECT quote_ident($1) AS schema_ident, quote_ident($2) AS table_ident" 31 | identifiers = await execute_query(sanitize_query, conn_id, [schema, table]) 32 | 33 | schema_ident = identifiers[0]['schema_ident'] 34 | table_ident = identifiers[0]['table_ident'] 35 | 36 | # Get approximate row count for the table (faster than COUNT(*)) 37 | query = f""" 38 | SELECT 39 | reltuples::bigint AS approximate_row_count 40 | FROM pg_class 41 | JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace 42 | WHERE 43 | pg_namespace.nspname = $1 44 | AND pg_class.relname = $2 45 | """ 46 | return await execute_query(query, conn_id, [schema, table]) -------------------------------------------------------------------------------- /server/resources/extensions.py: -------------------------------------------------------------------------------- 1 | # server/resources/extensions.py 2 | import os 3 | import yaml 4 | from server.config import mcp 5 | from server.logging_config import get_logger 6 | from server.tools.query import execute_query 7 | 8 | logger = get_logger("pg-mcp.resources.extensions") 9 | 10 | def get_extension_yaml(extension_name): 11 | """Load and return extension context YAML if it exists.""" 12 | extensions_dir = os.path.join(os.path.dirname(__file__), 'extensions') 13 | file_path = os.path.join(extensions_dir, f"{extension_name}.yaml") 14 | 15 | if os.path.exists(file_path): 16 | try: 17 | with open(file_path, 'r') as f: 18 | return yaml.safe_load(f) 19 | except Exception as e: 20 | logger.error(f"Error loading extension YAML for {extension_name}: {e}") 21 | 22 | return None 23 | 24 | def register_extension_resources(): 25 | """Register database extension resources with the MCP server.""" 26 | logger.debug("Registering extension resources") 27 | 28 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/extensions") 29 | async def list_schema_extensions(conn_id: str, schema: str): 30 | """List all extensions installed in a specific schema.""" 31 | query = """ 32 | SELECT 33 | e.extname AS name, 34 | e.extversion AS version, 35 | n.nspname AS schema, 36 | e.extrelocatable AS relocatable, 37 | obj_description(e.oid) AS description 38 | FROM 39 | pg_extension e 40 | JOIN 41 | pg_namespace n ON n.oid = e.extnamespace 42 | WHERE 43 | n.nspname = $1 44 | ORDER BY 45 | e.extname 46 | """ 47 | extensions = await execute_query(query, conn_id, [schema]) 48 | 49 | # Enhance with any available YAML context 50 | for ext in extensions: 51 | ext_name = ext.get('name') 52 | yaml_context = get_extension_yaml(ext_name) 53 | if yaml_context: 54 | ext['context_available'] = True 55 | else: 56 | ext['context_available'] = False 57 | 58 | return extensions 59 | 60 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/extensions/{extension}") 61 | async def get_extension_details(conn_id: str, schema: str, extension: str): 62 | """Get detailed information about a specific extension in a schema.""" 63 | # Return YAML context if available 64 | yaml_context = get_extension_yaml(extension) 65 | if yaml_context: 66 | return [yaml_context] 67 | 68 | # Return empty string if no YAML context 69 | return [""] -------------------------------------------------------------------------------- /server/resources/extensions/pgvector.yaml: -------------------------------------------------------------------------------- 1 | description: | 2 | pgvector is a PostgreSQL extension for vector similarity search. It provides vector data types 3 | and operators that enable storage and efficient querying of high-dimensional vector embeddings, 4 | commonly used in machine learning applications, semantic search, and recommendation systems. 5 | 6 | data_types: 7 | - name: "vector" 8 | description: "Fixed-length array of float values, representing an embedding vector" 9 | example: "embedding vector(1536)" 10 | notes: "Dimensions are defined in the table schema" 11 | 12 | operators: 13 | - symbol: "<->" 14 | name: "Euclidean distance" 15 | description: "L2 distance between vectors. Lower values indicate more similar vectors." 16 | example: "embedding <-> '[1,2,3]'::vector" 17 | 18 | - symbol: "<=>" 19 | name: "Cosine distance" 20 | description: "1 - cosine similarity. Lower values indicate more similar vectors." 21 | example: "embedding <=> '[1,2,3]'::vector" 22 | 23 | - symbol: "<#>" 24 | name: "Inner product distance" 25 | description: "Negative inner product. Lower values indicate more similar vectors." 26 | example: "embedding <#> '[1,2,3]'::vector" 27 | notes: "For actual inner product (dot product), use negative of this distance" 28 | 29 | functions: 30 | - name: "vector_dims" 31 | description: "Gets the dimension of a vector" 32 | example: "SELECT vector_dims(embedding) FROM items LIMIT 1" 33 | 34 | - name: "vector_norm" 35 | description: "Gets the L2 (Euclidean) norm of a vector" 36 | example: "SELECT vector_norm(embedding) FROM items LIMIT 1" 37 | 38 | - name: "vector_avg" 39 | description: "Aggregate function to average multiple vectors" 40 | example: "SELECT vector_avg(embedding) FROM items WHERE category = 'electronics'" 41 | 42 | - name: "vector_add" 43 | description: "Add two vectors together" 44 | example: "SELECT vector_add(embedding1, embedding2) FROM item_pairs LIMIT 10" 45 | 46 | - name: "vector_subtract" 47 | description: "Subtract second vector from first" 48 | example: "SELECT vector_subtract(embedding1, embedding2) FROM item_pairs LIMIT 10" 49 | 50 | - name: "cosine_similarity" 51 | description: "Calculate cosine similarity between vectors (not distance)" 52 | example: "SELECT 1 - (embedding <=> '[1,2,3]'::vector) AS similarity FROM items LIMIT 5" 53 | notes: "Computed as 1 minus the cosine distance" 54 | 55 | examples: 56 | - name: "Basic vector similarity search" 57 | query: | 58 | SELECT id, title, embedding <=> '[0.3,0.2,0.1]'::vector AS distance 59 | FROM documents 60 | ORDER BY embedding <=> '[0.3,0.2,0.1]'::vector 61 | LIMIT 5 62 | description: "Find the 5 most similar documents to the query vector using cosine distance" 63 | 64 | - name: "Hybrid search with metadata filtering" 65 | query: | 66 | SELECT id, title, embedding <-> query_embedding AS distance 67 | FROM documents, plainto_tsquery('english', 'database systems') query_text, 68 | '[0.3,0.2,0.1]'::vector query_embedding 69 | WHERE search_text @@ query_text 70 | ORDER BY embedding <-> query_embedding 71 | LIMIT 10 72 | description: "Combine full-text search with vector similarity to find the most relevant results" 73 | 74 | - name: "K-Nearest neighbors search" 75 | query: | 76 | SELECT id, title, embedding <-> '[0.9,0.8,0.7]'::vector AS distance 77 | FROM products 78 | ORDER BY embedding <-> '[0.9,0.8,0.7]'::vector 79 | LIMIT 10 80 | description: "Find the 10 nearest products in vector space to the query embedding" 81 | 82 | - name: "Vector similarity with pagination" 83 | query: | 84 | SELECT id, title, embedding <=> '[0.5,0.5,0.5]'::vector AS distance 85 | FROM documents 86 | ORDER BY embedding <=> '[0.5,0.5,0.5]'::vector 87 | LIMIT 10 OFFSET 20 88 | description: "Get the 3rd page of results (items 21-30) ordered by vector similarity" 89 | 90 | - name: "Filter by distance threshold" 91 | query: | 92 | SELECT id, title, 1 - (embedding <=> '[0.5,0.5,0.5]'::vector) AS similarity 93 | FROM documents 94 | WHERE embedding <=> '[0.5,0.5,0.5]'::vector < 0.2 95 | ORDER BY embedding <=> '[0.5,0.5,0.5]'::vector 96 | description: "Find documents with at least 80% cosine similarity to the query vector" 97 | 98 | - name: "Semantic deduplication with vector similarity" 99 | query: | 100 | SELECT a.id as id1, b.id as id2, 101 | 1 - (a.embedding <=> b.embedding) AS similarity 102 | FROM documents a 103 | JOIN documents b ON a.id < b.id 104 | WHERE a.embedding <=> b.embedding < 0.1 105 | ORDER BY similarity DESC 106 | LIMIT 100 107 | description: "Find document pairs that are semantically similar (>90% cosine similarity)" 108 | 109 | - name: "Multi-vector query (concept combination)" 110 | query: | 111 | SELECT id, title, 112 | (embedding <=> '[0.2,0.1,0.5]'::vector) * 0.7 + 113 | (embedding <=> '[0.8,0.3,0.1]'::vector) * 0.3 AS combined_score 114 | FROM documents 115 | ORDER BY combined_score 116 | LIMIT 5 117 | description: "Weighted combination of similarity to multiple concept vectors" 118 | 119 | - name: "Vector operations with subquery" 120 | query: | 121 | SELECT 122 | p.id, 123 | p.name, 124 | p.embedding <=> ( 125 | SELECT vector_avg(embedding) 126 | FROM products 127 | WHERE category = 'electronics' AND rating > 4.5 128 | ) AS distance_to_avg_electronics 129 | FROM products p 130 | WHERE p.category = 'electronics' 131 | ORDER BY distance_to_avg_electronics 132 | LIMIT 10 133 | description: "Find products most similar to the average vector of highly-rated electronics" 134 | 135 | - name: "Group by with vector functions" 136 | query: | 137 | SELECT 138 | category, 139 | vector_avg(embedding) AS category_avg_embedding, 140 | COUNT(*) AS product_count 141 | FROM products 142 | GROUP BY category 143 | HAVING COUNT(*) > 5 144 | description: "Calculate average embedding vector for each product category with more than 5 products" 145 | 146 | - name: "Vector similarity with complex filtering" 147 | query: | 148 | WITH target_embedding AS ( 149 | SELECT embedding 150 | FROM products 151 | WHERE id = 123 152 | ) 153 | SELECT 154 | p.id, 155 | p.name, 156 | p.embedding <-> t.embedding AS distance 157 | FROM products p, target_embedding t 158 | WHERE 159 | p.category = 'furniture' AND 160 | p.price BETWEEN 100 AND 500 AND 161 | p.in_stock = true AND 162 | p.embedding <-> t.embedding < 0.8 163 | ORDER BY distance 164 | LIMIT 20 165 | description: "Find in-stock furniture products in a specific price range most similar to a reference product" 166 | 167 | - name: "Negative example search (find items unlike a reference)" 168 | query: | 169 | SELECT id, name 170 | FROM products 171 | ORDER BY embedding <=> '[0.7,0.2,0.1]'::vector DESC 172 | LIMIT 5 173 | description: "Find products least similar to the reference vector by reversing the sort order" 174 | 175 | best_practices: 176 | - "For optimal performance with vectors, include LIMIT in your ORDER BY to avoid sorting the entire result set" 177 | - "When comparing a query vector to many vectors, use a parameter placeholder rather than hardcoding the vector" 178 | - "Combine vector similarity with additional WHERE clauses for more precise results" 179 | - "Use CTEs (WITH clauses) for complex queries involving vector operations for better readability" 180 | - "When calculating cosine similarity (not distance), use the formula: 1 - (vector1 <=> vector2)" 181 | - "For pagination with vector similarity, ensure you're using a consistent ordering with LIMIT and OFFSET" 182 | - "In GROUP BY operations with vector functions, be aware of memory usage with large result sets" 183 | - "For complex vector operations involving multiple tables, try to keep vector calculations in the ORDER BY clause indexed" 184 | - "When using vector distance in a WHERE clause (like WHERE embedding <-> query < 0.5), be aware this may not use the vector index efficiently" 185 | -------------------------------------------------------------------------------- /server/resources/extensions/postgis.yaml: -------------------------------------------------------------------------------- 1 | description: | 2 | PostGIS is a spatial database extension for PostgreSQL that adds support for geographic objects, 3 | allowing location queries to be run in SQL. It provides support for geographic objects 4 | and enables location queries on your PostgreSQL database. 5 | 6 | data_types: 7 | - name: "geometry" 8 | description: "Planar spatial data type for projections and Cartesian calculations" 9 | example: "geom geometry(POINT, 4326)" 10 | notes: "The number after POINT is the SRID (spatial reference ID)" 11 | 12 | - name: "geography" 13 | description: "Geodetic spatial data type for calculations on a spheroid (Earth)" 14 | example: "location geography(POINT, 4326)" 15 | notes: "More accurate for long distances but slower than geometry" 16 | 17 | - name: "box2d" 18 | description: "2D bounding box" 19 | example: "SELECT box2d(geom) FROM spatial_table" 20 | 21 | - name: "box3d" 22 | description: "3D bounding box" 23 | example: "SELECT box3d(geom) FROM spatial_table" 24 | 25 | functions: 26 | constructors: 27 | - name: "ST_GeomFromText" 28 | description: "Creates a geometry instance from Well-Known Text (WKT)" 29 | example: "SELECT ST_GeomFromText('POINT(-71.064 42.28)', 4326)" 30 | 31 | - name: "ST_MakePoint" 32 | description: "Creates a 2D, 3DZ or 4D point geometry" 33 | example: "SELECT ST_MakePoint(longitude, latitude)" 34 | 35 | - name: "ST_Point" 36 | description: "Creates a point geometry with the given coordinate values" 37 | example: "SELECT ST_Point(longitude, latitude)" 38 | 39 | - name: "ST_Buffer" 40 | description: "Creates a buffer area around a geometry" 41 | example: "SELECT ST_Buffer(geom, 1000) FROM locations" 42 | notes: "Distance in meters for geography type, in coordinate system units for geometry" 43 | 44 | accessors: 45 | - name: "ST_X" 46 | description: "Returns the X coordinate of a point" 47 | example: "SELECT ST_X(geom) FROM locations" 48 | 49 | - name: "ST_Y" 50 | description: "Returns the Y coordinate of a point" 51 | example: "SELECT ST_Y(geom) FROM locations" 52 | 53 | - name: "ST_AsText" 54 | description: "Returns the Well-Known Text (WKT) representation" 55 | example: "SELECT ST_AsText(geom) FROM locations" 56 | 57 | - name: "ST_AsGeoJSON" 58 | description: "Returns the GeoJSON representation of the geometry" 59 | example: "SELECT ST_AsGeoJSON(geom) FROM locations" 60 | 61 | - name: "ST_SRID" 62 | description: "Returns the spatial reference identifier" 63 | example: "SELECT ST_SRID(geom) FROM locations" 64 | 65 | operators: 66 | - name: "ST_Distance" 67 | description: "Returns the distance between two geometries" 68 | example: "SELECT ST_Distance(geom_a, geom_b) FROM spatial_data" 69 | notes: "Units depend on the projection; use geography type for meters" 70 | 71 | - name: "ST_DWithin" 72 | description: "Returns true if the geometries are within the specified distance" 73 | example: "SELECT * FROM locations WHERE ST_DWithin(geom, ST_MakePoint(-71.1, 42.3)::geography, 1000)" 74 | notes: "Much faster than ST_Distance < n for distance queries" 75 | 76 | - name: "ST_Contains" 77 | description: "Returns true if geometry A contains geometry B" 78 | example: "SELECT * FROM regions r, points p WHERE ST_Contains(r.geom, p.geom)" 79 | 80 | - name: "ST_Intersects" 81 | description: "Returns true if the geometries spatially intersect" 82 | example: "SELECT * FROM roads r, rivers v WHERE ST_Intersects(r.geom, v.geom)" 83 | 84 | - name: "ST_Within" 85 | description: "Returns true if geometry A is completely inside geometry B" 86 | example: "SELECT * FROM cities c, states s WHERE ST_Within(c.geom, s.geom)" 87 | 88 | - name: "ST_Area" 89 | description: "Returns the area of a polygon geometry" 90 | example: "SELECT ST_Area(geom) FROM polygons" 91 | notes: "Returns area in square units of the SRID (or square meters for geography)" 92 | 93 | examples: 94 | - name: "Find points within a radius" 95 | query: | 96 | SELECT 97 | name, 98 | ST_AsText(location) AS wkt_geom, 99 | ST_Distance(location, ST_MakePoint(-71.064, 42.28)::geography) AS distance_meters 100 | FROM 101 | places 102 | WHERE 103 | ST_DWithin( 104 | location, 105 | ST_MakePoint(-71.064, 42.28)::geography, 106 | 1000 107 | ) 108 | ORDER BY 109 | distance_meters 110 | description: "Find all places within 1000 meters of the specified point and sort by distance" 111 | 112 | - name: "Calculate distance between two points" 113 | query: | 114 | SELECT 115 | ST_Distance( 116 | ST_MakePoint(-73.935, 40.730)::geography, 117 | ST_MakePoint(-74.006, 40.712)::geography 118 | ) AS distance_meters 119 | description: "Calculate the distance in meters between two GPS coordinates" 120 | 121 | - name: "Spatial join with a polygon" 122 | query: | 123 | SELECT 124 | c.name, 125 | c.population, 126 | ST_Distance(c.geom, r.geom) AS distance_to_boundary 127 | FROM 128 | cities c 129 | JOIN 130 | regions r ON ST_Within(c.geom, r.geom) 131 | WHERE 132 | r.name = 'California' 133 | ORDER BY 134 | c.population DESC 135 | description: "Find all cities within the California region boundary, ordered by population" 136 | 137 | - name: "Find nearest neighbors" 138 | query: | 139 | SELECT 140 | h.name AS hospital_name, 141 | ST_Distance(h.geom, i.geom) AS distance 142 | FROM 143 | hospitals h, 144 | (SELECT geom FROM incidents WHERE id = 123) i 145 | ORDER BY 146 | h.geom <-> i.geom 147 | LIMIT 5 148 | description: "Find the 5 closest hospitals to a specific incident using the <-> distance operator" 149 | 150 | - name: "Transform coordinates between projections" 151 | query: | 152 | SELECT 153 | ST_AsText( 154 | ST_Transform( 155 | ST_SetSRID(ST_MakePoint(-71.064, 42.28), 4326), 156 | 2249 157 | ) 158 | ) AS transformed_point 159 | description: "Transform WGS84 coordinates to Massachusetts state plane" 160 | 161 | - name: "Find intersection points" 162 | query: | 163 | SELECT 164 | r1.name AS road1, 165 | r2.name AS road2, 166 | ST_AsText(ST_Intersection(r1.geom, r2.geom)) AS intersection_point 167 | FROM 168 | roads r1 169 | JOIN 170 | roads r2 ON r1.id < r2.id 171 | WHERE 172 | ST_Intersects(r1.geom, r2.geom) 173 | LIMIT 10 174 | description: "Find intersection points between different roads" 175 | 176 | - name: "Bounding box query" 177 | query: | 178 | SELECT 179 | name, 180 | ST_AsText(geom) 181 | FROM 182 | points_of_interest 183 | WHERE 184 | geom && ST_MakeEnvelope( 185 | -74.01, 40.70, -- Lower left corner (longitude, latitude) 186 | -73.97, 40.73, -- Upper right corner (longitude, latitude) 187 | 4326 -- SRID 188 | ) 189 | description: "Find all points of interest within a geographic bounding box (very efficient)" 190 | 191 | - name: "Aggregation with spatial data" 192 | query: | 193 | SELECT 194 | county_name, 195 | COUNT(*) AS num_businesses, 196 | ST_Area(county_geom) / 1000000 AS area_sq_km, 197 | COUNT(*) / (ST_Area(county_geom) / 1000000) AS business_density 198 | FROM 199 | businesses b 200 | JOIN 201 | counties c ON ST_Within(b.geom, c.geom) 202 | WHERE 203 | b.type = 'restaurant' 204 | GROUP BY 205 | county_name, county_geom 206 | ORDER BY 207 | business_density DESC 208 | description: "Calculate restaurant density per square kilometer for each county" 209 | 210 | - name: "Complex spatial analysis with CTE" 211 | query: | 212 | WITH 213 | buffer_zones AS ( 214 | SELECT 215 | s.id, 216 | s.name, 217 | ST_Buffer(s.geom::geography, 5000)::geometry AS buffer_geom 218 | FROM 219 | stores s 220 | WHERE 221 | s.region = 'Northeast' 222 | ), 223 | population_in_zones AS ( 224 | SELECT 225 | b.id AS store_id, 226 | b.name AS store_name, 227 | SUM(c.population) AS total_population 228 | FROM 229 | buffer_zones b 230 | JOIN 231 | census_blocks c ON ST_Intersects(c.geom, b.buffer_geom) 232 | GROUP BY 233 | b.id, b.name 234 | ) 235 | SELECT 236 | store_id, 237 | store_name, 238 | total_population, 239 | RANK() OVER (ORDER BY total_population DESC) AS population_rank 240 | FROM 241 | population_in_zones 242 | ORDER BY 243 | total_population DESC 244 | description: "Find total population within 5km of each Northeast store and rank stores by population coverage" 245 | 246 | best_practices: 247 | - "Use geography type (not geometry) when working with GPS coordinates and Earth distances in meters" 248 | - "For better performance on distance queries, use ST_DWithin instead of ST_Distance < value" 249 | - "The <-> operator (e.g., geom1 <-> geom2) is a distance operator that works with spatial indexes" 250 | - "For large datasets, ensure spatial indexes are available on geometry/geography columns" 251 | - "Use the && operator for bounding box queries, which is very efficient with spatial indexes" 252 | - "For points in WGS84, remember that longitude comes before latitude in all PostGIS functions" 253 | - "When working with different projections, make sure to use ST_Transform to convert between them" 254 | - "For complex spatial analysis, use Common Table Expressions (WITH) to make queries more readable" 255 | - "ST_Intersects is usually preferred over ST_Contains when checking for spatial relationships" 256 | - "For high-precision Earth distance calculations, cast to geography type: geom::geography" 257 | -------------------------------------------------------------------------------- /server/resources/schema.py: -------------------------------------------------------------------------------- 1 | # server/resources/schema.py 2 | import importlib.resources 3 | from server.config import mcp 4 | from server.logging_config import get_logger 5 | from server.tools.query import execute_query 6 | 7 | logger = get_logger("pg-mcp.resources.schemas") 8 | 9 | def load_sql_file(filename): 10 | """Load SQL from a file using importlib.resources.""" 11 | return importlib.resources.read_text('server.resources.sql', filename) 12 | 13 | def register_schema_resources(): 14 | """Register database schema resources with the MCP server.""" 15 | logger.debug("Registering schema resources") 16 | 17 | @mcp.resource("pgmcp://{conn_id}/", mime_type="application/json") 18 | async def get_database(conn_id: str): 19 | """ 20 | Get the complete database information including all schemas, tables, columns, and constraints. 21 | Returns a comprehensive JSON structure with the entire database structure. 22 | """ 23 | query = load_sql_file("get_database.sql") 24 | result = await execute_query(query, conn_id) 25 | if result and len(result) > 0: 26 | return result[0]['db_structure'] 27 | return {"schemas": []} 28 | 29 | @mcp.resource("pgmcp://{conn_id}/schemas", mime_type="application/json") 30 | async def list_schemas(conn_id: str): 31 | """Get all non-system schemas in the database.""" 32 | query = load_sql_file("list_schemas.sql") 33 | result = await execute_query(query, conn_id) 34 | if result and len(result) > 0: 35 | return result[0]['schema_list'] 36 | return {"schemas": []} 37 | 38 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}", mime_type="application/json") 39 | async def get_schema(conn_id: str, schema: str): 40 | """Get information about a particular schemas in the database. Also provides extension information (if any)""" 41 | query = load_sql_file("get_schema.sql") 42 | result = await execute_query(query, conn_id, [schema]) 43 | if result and len(result) > 0: 44 | return result[0]['schema_info'] 45 | return {"schema": []} 46 | 47 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/tables/{table}", mime_type="application/json") 48 | async def get_schema_table(conn_id: str, schema: str, table: str): 49 | """ 50 | Get comprehensive information about a specific table in a schema. 51 | This returns detailed information including columns, constraints, indexes, and statistics. 52 | """ 53 | query = load_sql_file("get_schema_table.sql") 54 | result = await execute_query(query, conn_id, [schema, table]) 55 | if result and len(result) > 0: 56 | return result[0]['table_details'] 57 | return {"table": {}} 58 | 59 | @mcp.resource("pgmcp://{conn_id}/schemas/{schema}/materialized_views/{view}", mime_type="application/json") 60 | async def get_schema_view(conn_id: str, schema: str, view: str): 61 | """ 62 | Get comprehensive information about a specific materialized view in a schema. 63 | This returns detailed information including the view definition SQL, columns, 64 | indexes, and statistics. 65 | """ 66 | query = load_sql_file("get_schema_view.sql") 67 | result = await execute_query(query, conn_id, [schema, view]) 68 | if result and len(result) > 0: 69 | return result[0]['view_details'] 70 | return {"materialized_view": {}} -------------------------------------------------------------------------------- /server/resources/sql/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/resources/sql/__init__.py -------------------------------------------------------------------------------- /server/resources/sql/get_database.sql: -------------------------------------------------------------------------------- 1 | -- server/resources/sql/db_get_database.sql 2 | 3 | -- Comprehensive database structure query 4 | -- Retrieve complete database schema information as JSON 5 | 6 | -- Get all non-system schemas 7 | WITH schemas AS ( 8 | SELECT 9 | n.nspname AS schema_name, 10 | obj_description(n.oid) AS description 11 | FROM 12 | pg_namespace n 13 | WHERE 14 | n.nspname NOT IN ('pg_catalog', 'information_schema', 'pg_toast') 15 | AND n.nspname NOT LIKE 'pg_%' 16 | ), 17 | 18 | -- Get all tables for each schema 19 | tables AS ( 20 | SELECT 21 | s.schema_name, 22 | t.relname AS table_name, 23 | obj_description(t.oid) AS description, 24 | pg_stat_get_tuples_inserted(t.oid) AS row_count 25 | FROM 26 | schemas s 27 | JOIN 28 | pg_class t ON t.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = s.schema_name) 29 | WHERE 30 | t.relkind = 'r' -- 'r' = regular table 31 | ), 32 | 33 | -- Get all columns for all tables 34 | columns AS ( 35 | SELECT 36 | t.schema_name, 37 | t.table_name, 38 | a.attname AS column_name, 39 | pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type, 40 | NOT a.attnotnull AS is_nullable, 41 | (SELECT pg_catalog.pg_get_expr(adbin, adrelid) FROM pg_catalog.pg_attrdef d 42 | WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef) AS column_default, 43 | col_description(a.attrelid, a.attnum) AS description, 44 | a.attnum AS ordinal_position 45 | FROM 46 | tables t 47 | JOIN 48 | pg_catalog.pg_class c ON c.relname = t.table_name 49 | AND c.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = t.schema_name) 50 | JOIN 51 | pg_catalog.pg_attribute a ON a.attrelid = c.oid 52 | WHERE 53 | a.attnum > 0 -- Skip system columns 54 | AND NOT a.attisdropped -- Skip dropped columns 55 | ), 56 | 57 | -- Get all primary and unique constraints 58 | key_constraints AS ( 59 | SELECT 60 | t.schema_name, 61 | t.table_name, 62 | con.conname AS constraint_name, 63 | con.contype AS constraint_type, 64 | CASE 65 | WHEN con.contype = 'p' THEN 'PRIMARY KEY' 66 | WHEN con.contype = 'u' THEN 'UNIQUE' 67 | ELSE 'OTHER' 68 | END AS constraint_type_desc, 69 | array_agg(a.attname ORDER BY array_position(con.conkey, a.attnum)) AS column_names 70 | FROM 71 | tables t 72 | JOIN 73 | pg_constraint con ON con.conrelid = ( 74 | SELECT oid FROM pg_class 75 | WHERE relname = t.table_name 76 | AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = t.schema_name) 77 | ) 78 | JOIN 79 | pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = ANY(con.conkey) 80 | WHERE 81 | con.contype IN ('p', 'u') -- 'p' = primary key, 'u' = unique 82 | GROUP BY 83 | t.schema_name, t.table_name, con.conname, con.contype 84 | ), 85 | 86 | -- Get all foreign key constraints 87 | foreign_keys AS ( 88 | SELECT 89 | t.schema_name, 90 | t.table_name, 91 | con.conname AS constraint_name, 92 | 'f' AS constraint_type, 93 | 'FOREIGN KEY' AS constraint_type_desc, 94 | array_agg(a.attname ORDER BY array_position(con.conkey, a.attnum)) AS column_names, 95 | nr.nspname AS referenced_schema, 96 | ref_table.relname AS referenced_table, 97 | array_agg(ref_col.attname ORDER BY array_position(con.confkey, ref_col.attnum)) AS referenced_columns 98 | FROM 99 | tables t 100 | JOIN 101 | pg_constraint con ON con.conrelid = ( 102 | SELECT oid FROM pg_class 103 | WHERE relname = t.table_name 104 | AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = t.schema_name) 105 | ) 106 | JOIN 107 | pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = ANY(con.conkey) 108 | JOIN 109 | pg_class ref_table ON ref_table.oid = con.confrelid 110 | JOIN 111 | pg_namespace nr ON nr.oid = ref_table.relnamespace 112 | JOIN 113 | pg_attribute ref_col ON ref_col.attrelid = con.confrelid AND ref_col.attnum = ANY(con.confkey) 114 | WHERE 115 | con.contype = 'f' -- 'f' = foreign key 116 | GROUP BY 117 | t.schema_name, t.table_name, con.conname, nr.nspname, ref_table.relname 118 | ), 119 | 120 | -- Get all check constraints 121 | check_constraints AS ( 122 | SELECT 123 | t.schema_name, 124 | t.table_name, 125 | con.conname AS constraint_name, 126 | 'c' AS constraint_type, 127 | 'CHECK' AS constraint_type_desc, 128 | pg_get_constraintdef(con.oid) AS definition 129 | FROM 130 | tables t 131 | JOIN 132 | pg_constraint con ON con.conrelid = ( 133 | SELECT oid FROM pg_class 134 | WHERE relname = t.table_name 135 | AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = t.schema_name) 136 | ) 137 | WHERE 138 | con.contype = 'c' -- 'c' = check constraint 139 | ), 140 | 141 | -- Get all indexes 142 | indexes AS ( 143 | SELECT 144 | t.schema_name, 145 | t.table_name, 146 | i.relname AS index_name, 147 | am.amname AS index_type, 148 | ix.indisunique AS is_unique, 149 | ix.indisprimary AS is_primary, 150 | array_agg(a.attname ORDER BY array_position(ix.indkey::int[], a.attnum)) AS column_names 151 | FROM 152 | tables t 153 | JOIN 154 | pg_class tbl ON tbl.relname = t.table_name 155 | AND tbl.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = t.schema_name) 156 | JOIN 157 | pg_index ix ON ix.indrelid = tbl.oid 158 | JOIN 159 | pg_class i ON i.oid = ix.indexrelid 160 | JOIN 161 | pg_am am ON am.oid = i.relam 162 | JOIN 163 | pg_attribute a ON a.attrelid = tbl.oid AND a.attnum = ANY(ix.indkey::int[]) 164 | GROUP BY 165 | t.schema_name, t.table_name, i.relname, am.amname, ix.indisunique, ix.indisprimary 166 | ) 167 | 168 | -- Main query to return all data in JSON format 169 | SELECT 170 | jsonb_build_object( 171 | 'schemas', 172 | ( 173 | SELECT jsonb_agg( 174 | jsonb_build_object( 175 | 'name', s.schema_name, 176 | 'description', s.description, 177 | 'tables', ( 178 | SELECT jsonb_agg( 179 | jsonb_build_object( 180 | 'name', t.table_name, 181 | 'description', t.description, 182 | 'row_count', t.row_count, 183 | 'columns', ( 184 | SELECT jsonb_agg( 185 | jsonb_build_object( 186 | 'name', c.column_name, 187 | 'type', c.data_type, 188 | 'nullable', c.is_nullable, 189 | 'default', c.column_default, 190 | 'description', c.description, 191 | 'constraints', ( 192 | SELECT jsonb_agg( 193 | constraint_type_desc 194 | ) 195 | FROM ( 196 | SELECT kc.constraint_type_desc 197 | FROM key_constraints kc 198 | WHERE kc.schema_name = t.schema_name 199 | AND kc.table_name = t.table_name 200 | AND c.column_name = ANY(kc.column_names) 201 | UNION ALL 202 | SELECT 'FOREIGN KEY' 203 | FROM foreign_keys fk 204 | WHERE fk.schema_name = t.schema_name 205 | AND fk.table_name = t.table_name 206 | AND c.column_name = ANY(fk.column_names) 207 | ) constraints 208 | ) 209 | ) ORDER BY c.ordinal_position 210 | ) 211 | FROM columns c 212 | WHERE c.schema_name = t.schema_name 213 | AND c.table_name = t.table_name 214 | ), 215 | 'foreign_keys', ( 216 | SELECT jsonb_agg( 217 | jsonb_build_object( 218 | 'name', fk.constraint_name, 219 | 'columns', fk.column_names, 220 | 'referenced_schema', fk.referenced_schema, 221 | 'referenced_table', fk.referenced_table, 222 | 'referenced_columns', fk.referenced_columns 223 | ) 224 | ) 225 | FROM foreign_keys fk 226 | WHERE fk.schema_name = t.schema_name 227 | AND fk.table_name = t.table_name 228 | ), 229 | 'indexes', ( 230 | SELECT jsonb_agg( 231 | jsonb_build_object( 232 | 'name', idx.index_name, 233 | 'type', idx.index_type, 234 | 'is_unique', idx.is_unique, 235 | 'is_primary', idx.is_primary, 236 | 'columns', idx.column_names 237 | ) 238 | ) 239 | FROM indexes idx 240 | WHERE idx.schema_name = t.schema_name 241 | AND idx.table_name = t.table_name 242 | ), 243 | 'check_constraints', ( 244 | SELECT jsonb_agg( 245 | jsonb_build_object( 246 | 'name', cc.constraint_name, 247 | 'definition', cc.definition 248 | ) 249 | ) 250 | FROM check_constraints cc 251 | WHERE cc.schema_name = t.schema_name 252 | AND cc.table_name = t.table_name 253 | ) 254 | ) ORDER BY t.table_name 255 | ) 256 | FROM tables t 257 | WHERE t.schema_name = s.schema_name 258 | ) 259 | ) ORDER BY s.schema_name 260 | ) 261 | FROM schemas s 262 | ) 263 | ) AS db_structure; -------------------------------------------------------------------------------- /server/resources/sql/get_schema.sql: -------------------------------------------------------------------------------- 1 | -- server/resources/sql/get_schema.sql 2 | -- Get top-level information about a specific schema including basic schema metadata, 3 | -- extensions installed in the schema, table information, and materialized views 4 | -- Returns a simplified JSON object with schema information 5 | 6 | WITH 7 | -- Get schema information 8 | schema_info AS ( 9 | SELECT 10 | n.nspname AS schema_name, 11 | obj_description(n.oid) AS description 12 | FROM 13 | pg_namespace n 14 | WHERE 15 | n.nspname = $1 16 | ), 17 | 18 | -- Get extensions installed in this schema 19 | extensions AS ( 20 | SELECT 21 | e.extname AS name, 22 | e.extversion AS version, 23 | obj_description(e.oid) AS description 24 | FROM 25 | pg_extension e 26 | JOIN 27 | pg_namespace n ON n.oid = e.extnamespace 28 | WHERE 29 | n.nspname = $1 30 | ORDER BY 31 | e.extname 32 | ), 33 | 34 | -- Get all tables in this schema with basic information 35 | tables AS ( 36 | SELECT 37 | t.relname AS table_name, 38 | obj_description(t.oid) AS description, 39 | pg_stat_get_tuples_inserted(t.oid) AS row_count, 40 | pg_total_relation_size(t.oid) AS total_size_bytes 41 | FROM 42 | pg_class t 43 | JOIN 44 | pg_namespace n ON t.relnamespace = n.oid 45 | WHERE 46 | n.nspname = $1 47 | AND t.relkind = 'r' -- 'r' = regular table 48 | ORDER BY 49 | t.relname 50 | ), 51 | 52 | -- Get all materialized views in this schema 53 | materialized_views AS ( 54 | SELECT 55 | m.relname AS view_name, 56 | obj_description(m.oid) AS description, 57 | pg_stat_get_tuples_inserted(m.oid) AS row_count, 58 | pg_total_relation_size(m.oid) AS total_size_bytes 59 | FROM 60 | pg_class m 61 | JOIN 62 | pg_namespace n ON m.relnamespace = n.oid 63 | WHERE 64 | n.nspname = $1 65 | AND m.relkind = 'm' -- 'm' = materialized view 66 | ORDER BY 67 | m.relname 68 | ) 69 | 70 | -- Main query to return all data in JSON format 71 | SELECT jsonb_build_object( 72 | 'schema_info', 73 | jsonb_build_object( 74 | 'name', (SELECT schema_name FROM schema_info), 75 | 'description', (SELECT description FROM schema_info), 76 | 'extensions', ( 77 | SELECT COALESCE( 78 | jsonb_agg( 79 | jsonb_build_object( 80 | 'name', e.name, 81 | 'version', e.version, 82 | 'description', e.description 83 | ) 84 | ), 85 | '[]'::jsonb 86 | ) 87 | FROM extensions e 88 | ), 89 | 'tables', ( 90 | SELECT COALESCE( 91 | jsonb_agg( 92 | jsonb_build_object( 93 | 'name', t.table_name, 94 | 'description', t.description, 95 | 'row_count', t.row_count, 96 | 'size_bytes', t.total_size_bytes 97 | ) ORDER BY t.table_name 98 | ), 99 | '[]'::jsonb 100 | ) 101 | FROM tables t 102 | ), 103 | 'materialized_views', ( 104 | SELECT COALESCE( 105 | jsonb_agg( 106 | jsonb_build_object( 107 | 'name', mv.view_name, 108 | 'description', mv.description, 109 | 'row_count', mv.row_count, 110 | 'size_bytes', mv.total_size_bytes 111 | ) ORDER BY mv.view_name 112 | ), 113 | '[]'::jsonb 114 | ) 115 | FROM materialized_views mv 116 | ) 117 | ) 118 | ) AS schema_info; -------------------------------------------------------------------------------- /server/resources/sql/get_schema_table.sql: -------------------------------------------------------------------------------- 1 | -- server/resources/sql/get_schema_table.sql 2 | -- Comprehensive query to get all details for a specific table 3 | -- Returns a JSON structure with columns, constraints, indexes, and statistics 4 | 5 | WITH 6 | -- Get table information 7 | table_info AS ( 8 | SELECT 9 | t.relname AS table_name, 10 | obj_description(t.oid) AS description, 11 | pg_stat_get_tuples_inserted(t.oid) AS row_count, 12 | pg_total_relation_size(t.oid) AS total_size_bytes, 13 | pg_table_size(t.oid) AS table_size_bytes, 14 | pg_indexes_size(t.oid) AS indexes_size_bytes, 15 | t.relkind AS kind 16 | FROM 17 | pg_class t 18 | JOIN 19 | pg_namespace n ON t.relnamespace = n.oid 20 | WHERE 21 | n.nspname = $1 22 | AND t.relname = $2 23 | AND t.relkind = 'r' -- 'r' = regular table 24 | ), 25 | 26 | -- Get all columns for this table 27 | columns AS ( 28 | SELECT 29 | a.attname AS column_name, 30 | pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type, 31 | NOT a.attnotnull AS is_nullable, 32 | (SELECT pg_catalog.pg_get_expr(adbin, adrelid) FROM pg_catalog.pg_attrdef d 33 | WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef) AS column_default, 34 | col_description(a.attrelid, a.attnum) AS description, 35 | a.attnum AS ordinal_position, 36 | a.attidentity IN ('a', 'd') AS is_identity, 37 | CASE 38 | WHEN a.attidentity = 'a' THEN 'ALWAYS' 39 | WHEN a.attidentity = 'd' THEN 'BY DEFAULT' 40 | ELSE NULL 41 | END AS identity_generation, 42 | a.atthasdef AS has_default, 43 | a.attisdropped AS is_dropped, 44 | a.attstorage AS storage_type, 45 | CASE 46 | WHEN a.attstorage = 'p' THEN 'plain' 47 | WHEN a.attstorage = 'e' THEN 'external' 48 | WHEN a.attstorage = 'm' THEN 'main' 49 | WHEN a.attstorage = 'x' THEN 'extended' 50 | ELSE a.attstorage::text 51 | END AS storage_type_desc 52 | FROM 53 | pg_catalog.pg_attribute a 54 | JOIN 55 | pg_catalog.pg_class c ON c.oid = a.attrelid 56 | JOIN 57 | pg_catalog.pg_namespace n ON n.oid = c.relnamespace 58 | WHERE 59 | n.nspname = $1 60 | AND c.relname = $2 61 | AND a.attnum > 0 -- Skip system columns 62 | AND NOT a.attisdropped -- Skip dropped columns 63 | ORDER BY 64 | a.attnum 65 | ), 66 | 67 | -- Get all primary and unique constraints 68 | key_constraints AS ( 69 | SELECT 70 | con.conname AS constraint_name, 71 | con.contype AS constraint_type, 72 | CASE 73 | WHEN con.contype = 'p' THEN 'PRIMARY KEY' 74 | WHEN con.contype = 'u' THEN 'UNIQUE' 75 | ELSE 'OTHER' 76 | END AS constraint_type_desc, 77 | obj_description(con.oid) AS description, 78 | pg_get_constraintdef(con.oid) AS definition, 79 | array_agg(a.attname ORDER BY array_position(con.conkey, a.attnum)) AS column_names 80 | FROM 81 | pg_constraint con 82 | JOIN 83 | pg_namespace n ON n.oid = con.connamespace 84 | JOIN 85 | pg_class t ON t.oid = con.conrelid 86 | JOIN 87 | pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = ANY(con.conkey) 88 | WHERE 89 | n.nspname = $1 90 | AND t.relname = $2 91 | AND con.contype IN ('p', 'u') -- 'p' = primary key, 'u' = unique 92 | GROUP BY 93 | con.conname, con.contype, con.oid 94 | ORDER BY 95 | con.contype, con.conname 96 | ), 97 | 98 | -- Get all foreign key constraints 99 | foreign_keys AS ( 100 | SELECT 101 | con.conname AS constraint_name, 102 | 'f' AS constraint_type, 103 | 'FOREIGN KEY' AS constraint_type_desc, 104 | obj_description(con.oid) AS description, 105 | pg_get_constraintdef(con.oid) AS definition, 106 | array_agg(a.attname ORDER BY array_position(con.conkey, a.attnum)) AS column_names, 107 | nr.nspname AS referenced_schema, 108 | ref_table.relname AS referenced_table, 109 | array_agg(ref_col.attname ORDER BY array_position(con.confkey, ref_col.attnum)) AS referenced_columns, 110 | CASE con.confdeltype 111 | WHEN 'a' THEN 'NO ACTION' 112 | WHEN 'r' THEN 'RESTRICT' 113 | WHEN 'c' THEN 'CASCADE' 114 | WHEN 'n' THEN 'SET NULL' 115 | WHEN 'd' THEN 'SET DEFAULT' 116 | ELSE NULL 117 | END AS delete_rule, 118 | CASE con.confupdtype 119 | WHEN 'a' THEN 'NO ACTION' 120 | WHEN 'r' THEN 'RESTRICT' 121 | WHEN 'c' THEN 'CASCADE' 122 | WHEN 'n' THEN 'SET NULL' 123 | WHEN 'd' THEN 'SET DEFAULT' 124 | ELSE NULL 125 | END AS update_rule 126 | FROM 127 | pg_constraint con 128 | JOIN 129 | pg_namespace n ON n.oid = con.connamespace 130 | JOIN 131 | pg_class t ON t.oid = con.conrelid 132 | JOIN 133 | pg_attribute a ON a.attrelid = con.conrelid AND a.attnum = ANY(con.conkey) 134 | JOIN 135 | pg_class ref_table ON ref_table.oid = con.confrelid 136 | JOIN 137 | pg_namespace nr ON nr.oid = ref_table.relnamespace 138 | JOIN 139 | pg_attribute ref_col ON ref_col.attrelid = con.confrelid AND ref_col.attnum = ANY(con.confkey) 140 | WHERE 141 | n.nspname = $1 142 | AND t.relname = $2 143 | AND con.contype = 'f' -- 'f' = foreign key 144 | GROUP BY 145 | con.conname, con.contype, con.oid, nr.nspname, ref_table.relname, con.confdeltype, con.confupdtype 146 | ORDER BY 147 | con.conname 148 | ), 149 | 150 | -- Get check constraints 151 | check_constraints AS ( 152 | SELECT 153 | con.conname AS constraint_name, 154 | 'c' AS constraint_type, 155 | 'CHECK' AS constraint_type_desc, 156 | obj_description(con.oid) AS description, 157 | pg_get_constraintdef(con.oid) AS definition 158 | FROM 159 | pg_constraint con 160 | JOIN 161 | pg_namespace n ON n.oid = con.connamespace 162 | JOIN 163 | pg_class t ON t.oid = con.conrelid 164 | WHERE 165 | n.nspname = $1 166 | AND t.relname = $2 167 | AND con.contype = 'c' -- 'c' = check constraint 168 | ORDER BY 169 | con.conname 170 | ), 171 | 172 | -- Get all indexes 173 | indexes AS ( 174 | SELECT 175 | i.relname AS index_name, 176 | pg_get_indexdef(i.oid) AS index_definition, 177 | obj_description(i.oid) AS description, 178 | am.amname AS index_type, 179 | ix.indisunique AS is_unique, 180 | ix.indisprimary AS is_primary, 181 | ix.indisexclusion AS is_exclusion, 182 | ix.indimmediate AS is_immediate, 183 | ix.indisclustered AS is_clustered, 184 | ix.indisvalid AS is_valid, 185 | i.relpages AS pages, 186 | i.reltuples AS rows, 187 | array_agg(a.attname ORDER BY array_position(ix.indkey::int[], a.attnum)) AS column_names, 188 | array_agg(pg_get_indexdef(i.oid, k.i::int, false) ORDER BY k.i) AS column_expressions 189 | FROM 190 | pg_index ix 191 | JOIN 192 | pg_class i ON i.oid = ix.indexrelid 193 | JOIN 194 | pg_class t ON t.oid = ix.indrelid 195 | JOIN 196 | pg_namespace n ON n.oid = t.relnamespace 197 | JOIN 198 | pg_am am ON i.relam = am.oid 199 | LEFT JOIN 200 | LATERAL unnest(ix.indkey::int[]) WITH ORDINALITY AS k(attnum, i) ON TRUE 201 | LEFT JOIN 202 | pg_attribute a ON a.attrelid = t.oid AND a.attnum = k.attnum 203 | WHERE 204 | n.nspname = $1 205 | AND t.relname = $2 206 | GROUP BY 207 | i.relname, i.oid, am.amname, ix.indisunique, ix.indisprimary, 208 | ix.indisexclusion, ix.indimmediate, ix.indisclustered, ix.indisvalid, 209 | i.relpages, i.reltuples 210 | ORDER BY 211 | i.relname 212 | ), 213 | 214 | -- Get table statistics 215 | table_stats AS ( 216 | SELECT 217 | seq_scan, 218 | seq_tup_read, 219 | idx_scan, 220 | idx_tup_fetch, 221 | n_tup_ins, 222 | n_tup_upd, 223 | n_tup_del, 224 | n_tup_hot_upd, 225 | n_live_tup, 226 | n_dead_tup, 227 | n_mod_since_analyze, 228 | last_vacuum, 229 | last_autovacuum, 230 | last_analyze, 231 | last_autoanalyze, 232 | vacuum_count, 233 | autovacuum_count, 234 | analyze_count, 235 | autoanalyze_count 236 | FROM 237 | pg_stat_user_tables 238 | WHERE 239 | schemaname = $1 240 | AND relname = $2 241 | ) 242 | 243 | -- Main query to build the JSON result 244 | SELECT jsonb_build_object( 245 | 'table', 246 | jsonb_build_object( 247 | 'name', (SELECT table_name FROM table_info), 248 | 'description', (SELECT description FROM table_info), 249 | 'row_count', (SELECT row_count FROM table_info), 250 | 'size', jsonb_build_object( 251 | 'total_bytes', (SELECT total_size_bytes FROM table_info), 252 | 'table_bytes', (SELECT table_size_bytes FROM table_info), 253 | 'indexes_bytes', (SELECT indexes_size_bytes FROM table_info) 254 | ), 255 | 'columns', ( 256 | SELECT COALESCE( 257 | jsonb_agg( 258 | jsonb_build_object( 259 | 'name', c.column_name, 260 | 'type', c.data_type, 261 | 'nullable', c.is_nullable, 262 | 'default', c.column_default, 263 | 'description', c.description, 264 | 'position', c.ordinal_position, 265 | 'is_identity', c.is_identity, 266 | 'identity_generation', c.identity_generation, 267 | 'storage', c.storage_type_desc 268 | ) 269 | ORDER BY c.ordinal_position 270 | ), 271 | '[]'::jsonb 272 | ) 273 | FROM columns c 274 | ), 275 | 'constraints', jsonb_build_object( 276 | 'primary_keys', ( 277 | SELECT COALESCE( 278 | jsonb_agg( 279 | jsonb_build_object( 280 | 'name', kc.constraint_name, 281 | 'columns', kc.column_names, 282 | 'definition', kc.definition, 283 | 'description', kc.description 284 | ) 285 | ), 286 | '[]'::jsonb 287 | ) 288 | FROM key_constraints kc 289 | WHERE kc.constraint_type = 'p' 290 | ), 291 | 'unique_constraints', ( 292 | SELECT COALESCE( 293 | jsonb_agg( 294 | jsonb_build_object( 295 | 'name', kc.constraint_name, 296 | 'columns', kc.column_names, 297 | 'definition', kc.definition, 298 | 'description', kc.description 299 | ) 300 | ), 301 | '[]'::jsonb 302 | ) 303 | FROM key_constraints kc 304 | WHERE kc.constraint_type = 'u' 305 | ), 306 | 'foreign_keys', ( 307 | SELECT COALESCE( 308 | jsonb_agg( 309 | jsonb_build_object( 310 | 'name', fk.constraint_name, 311 | 'columns', fk.column_names, 312 | 'referenced_schema', fk.referenced_schema, 313 | 'referenced_table', fk.referenced_table, 314 | 'referenced_columns', fk.referenced_columns, 315 | 'delete_rule', fk.delete_rule, 316 | 'update_rule', fk.update_rule, 317 | 'definition', fk.definition, 318 | 'description', fk.description 319 | ) 320 | ), 321 | '[]'::jsonb 322 | ) 323 | FROM foreign_keys fk 324 | ), 325 | 'check_constraints', ( 326 | SELECT COALESCE( 327 | jsonb_agg( 328 | jsonb_build_object( 329 | 'name', cc.constraint_name, 330 | 'definition', cc.definition, 331 | 'description', cc.description 332 | ) 333 | ), 334 | '[]'::jsonb 335 | ) 336 | FROM check_constraints cc 337 | ) 338 | ), 339 | 'indexes', ( 340 | SELECT COALESCE( 341 | jsonb_agg( 342 | jsonb_build_object( 343 | 'name', i.index_name, 344 | 'type', i.index_type, 345 | 'definition', i.index_definition, 346 | 'is_unique', i.is_unique, 347 | 'is_primary', i.is_primary, 348 | 'is_valid', i.is_valid, 349 | 'column_names', i.column_names, 350 | 'column_expressions', i.column_expressions, 351 | 'size', jsonb_build_object( 352 | 'pages', i.pages, 353 | 'rows', i.rows 354 | ), 355 | 'description', i.description 356 | ) 357 | ), 358 | '[]'::jsonb 359 | ) 360 | FROM indexes i 361 | ), 362 | 'statistics', ( 363 | SELECT COALESCE( 364 | jsonb_build_object( 365 | 'seq_scan', s.seq_scan, 366 | 'idx_scan', s.idx_scan, 367 | 'live_tuples', s.n_live_tup 368 | ), 369 | '{}'::jsonb 370 | ) 371 | FROM table_stats s 372 | ) 373 | ) 374 | ) AS table_details; -------------------------------------------------------------------------------- /server/resources/sql/get_schema_view.sql: -------------------------------------------------------------------------------- 1 | -- server/resources/sql/get_schema_view.sql 2 | -- Comprehensive query to get all details for a specific materialized view 3 | -- Returns a JSON structure with columns, indexes, statistics, and view definition 4 | 5 | WITH 6 | -- Get materialized view information 7 | view_info AS ( 8 | SELECT 9 | v.relname AS view_name, 10 | obj_description(v.oid) AS description, 11 | pg_stat_get_tuples_inserted(v.oid) AS row_count, 12 | pg_total_relation_size(v.oid) AS total_size_bytes, 13 | pg_table_size(v.oid) AS data_size_bytes, 14 | pg_indexes_size(v.oid) AS indexes_size_bytes, 15 | v.relkind AS kind, 16 | -- Get the view definition SQL 17 | pg_get_viewdef(v.oid) AS view_definition 18 | FROM 19 | pg_class v 20 | JOIN 21 | pg_namespace n ON v.relnamespace = n.oid 22 | WHERE 23 | n.nspname = $1 24 | AND v.relname = $2 25 | AND v.relkind = 'm' -- 'm' = materialized view 26 | ), 27 | 28 | -- Get all columns for this materialized view 29 | columns AS ( 30 | SELECT 31 | a.attname AS column_name, 32 | pg_catalog.format_type(a.atttypid, a.atttypmod) AS data_type, 33 | NOT a.attnotnull AS is_nullable, 34 | (SELECT pg_catalog.pg_get_expr(adbin, adrelid) FROM pg_catalog.pg_attrdef d 35 | WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum AND a.atthasdef) AS column_default, 36 | col_description(a.attrelid, a.attnum) AS description, 37 | a.attnum AS ordinal_position, 38 | a.attstorage AS storage_type, 39 | CASE 40 | WHEN a.attstorage = 'p' THEN 'plain' 41 | WHEN a.attstorage = 'e' THEN 'external' 42 | WHEN a.attstorage = 'm' THEN 'main' 43 | WHEN a.attstorage = 'x' THEN 'extended' 44 | ELSE a.attstorage::text 45 | END AS storage_type_desc 46 | FROM 47 | pg_catalog.pg_attribute a 48 | JOIN 49 | pg_catalog.pg_class c ON c.oid = a.attrelid 50 | JOIN 51 | pg_catalog.pg_namespace n ON n.oid = c.relnamespace 52 | WHERE 53 | n.nspname = $1 54 | AND c.relname = $2 55 | AND a.attnum > 0 -- Skip system columns 56 | AND NOT a.attisdropped -- Skip dropped columns 57 | ORDER BY 58 | a.attnum 59 | ), 60 | 61 | -- Get all indexes 62 | indexes AS ( 63 | SELECT 64 | i.relname AS index_name, 65 | pg_get_indexdef(i.oid) AS index_definition, 66 | obj_description(i.oid) AS description, 67 | am.amname AS index_type, 68 | ix.indisunique AS is_unique, 69 | ix.indisprimary AS is_primary, 70 | ix.indisvalid AS is_valid, 71 | array_agg(a.attname ORDER BY array_position(ix.indkey::int[], a.attnum)) AS column_names, 72 | array_agg(pg_get_indexdef(i.oid, k.i::int, false) ORDER BY k.i) AS column_expressions 73 | FROM 74 | pg_index ix 75 | JOIN 76 | pg_class i ON i.oid = ix.indexrelid 77 | JOIN 78 | pg_class t ON t.oid = ix.indrelid 79 | JOIN 80 | pg_namespace n ON n.oid = t.relnamespace 81 | JOIN 82 | pg_am am ON i.relam = am.oid 83 | LEFT JOIN 84 | LATERAL unnest(ix.indkey::int[]) WITH ORDINALITY AS k(attnum, i) ON TRUE 85 | LEFT JOIN 86 | pg_attribute a ON a.attrelid = t.oid AND a.attnum = k.attnum 87 | WHERE 88 | n.nspname = $1 89 | AND t.relname = $2 90 | GROUP BY 91 | i.relname, i.oid, am.amname, ix.indisunique, ix.indisprimary, ix.indisvalid 92 | ORDER BY 93 | i.relname 94 | ), 95 | 96 | -- Get view statistics 97 | view_stats AS ( 98 | SELECT 99 | seq_scan, 100 | idx_scan, 101 | n_live_tup 102 | FROM 103 | pg_stat_user_tables 104 | WHERE 105 | schemaname = $1 106 | AND relname = $2 107 | ), 108 | 109 | -- Get view refresh information 110 | refresh_info AS ( 111 | SELECT 112 | c.relname, 113 | COALESCE(last_refresh_time, '1970-01-01'::timestamp) AS last_refresh_time 114 | FROM 115 | pg_class c 116 | LEFT JOIN 117 | (SELECT relid, last_refresh_time FROM pg_catalog.pg_stats_ext_matviews) sv ON c.oid = sv.relid 118 | WHERE 119 | c.relkind = 'm' 120 | AND c.relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = $1) 121 | AND c.relname = $2 122 | ) 123 | 124 | -- Main query to build the JSON result 125 | SELECT jsonb_build_object( 126 | 'materialized_view', 127 | jsonb_build_object( 128 | 'name', (SELECT view_name FROM view_info), 129 | 'description', (SELECT description FROM view_info), 130 | 'row_count', (SELECT row_count FROM view_info), 131 | 'definition', (SELECT view_definition FROM view_info), 132 | 'size', jsonb_build_object( 133 | 'total_bytes', (SELECT total_size_bytes FROM view_info), 134 | 'data_bytes', (SELECT data_size_bytes FROM view_info), 135 | 'indexes_bytes', (SELECT indexes_size_bytes FROM view_info) 136 | ), 137 | 'last_refresh', (SELECT last_refresh_time FROM refresh_info), 138 | 'columns', ( 139 | SELECT COALESCE( 140 | jsonb_agg( 141 | jsonb_build_object( 142 | 'name', c.column_name, 143 | 'type', c.data_type, 144 | 'nullable', c.is_nullable, 145 | 'default', c.column_default, 146 | 'description', c.description, 147 | 'position', c.ordinal_position, 148 | 'storage', c.storage_type_desc 149 | ) 150 | ORDER BY c.ordinal_position 151 | ), 152 | '[]'::jsonb 153 | ) 154 | FROM columns c 155 | ), 156 | 'indexes', ( 157 | SELECT COALESCE( 158 | jsonb_agg( 159 | jsonb_build_object( 160 | 'name', i.index_name, 161 | 'type', i.index_type, 162 | 'definition', i.index_definition, 163 | 'is_unique', i.is_unique, 164 | 'is_primary', i.is_primary, 165 | 'is_valid', i.is_valid, 166 | 'column_names', i.column_names, 167 | 'column_expressions', i.column_expressions, 168 | 'description', i.description 169 | ) 170 | ), 171 | '[]'::jsonb 172 | ) 173 | FROM indexes i 174 | ), 175 | 'statistics', ( 176 | SELECT COALESCE( 177 | jsonb_build_object( 178 | 'seq_scan', s.seq_scan, 179 | 'idx_scan', s.idx_scan, 180 | 'live_tuples', s.n_live_tup 181 | ), 182 | '{}'::jsonb 183 | ) 184 | FROM view_stats s 185 | ) 186 | ) 187 | ) AS view_details; -------------------------------------------------------------------------------- /server/resources/sql/list_schemas.sql: -------------------------------------------------------------------------------- 1 | -- server/resources/sql/list_schemas.sql 2 | -- List all non-system schemas in the database 3 | -- Returns a JSON array of schema objects 4 | WITH schemas AS ( 5 | SELECT 6 | schema_name, 7 | obj_description(pg_namespace.oid) as description 8 | FROM information_schema.schemata 9 | JOIN pg_namespace ON pg_namespace.nspname = schema_name 10 | WHERE 11 | schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast') 12 | AND schema_name NOT LIKE 'pg_%' 13 | ORDER BY schema_name 14 | ) 15 | SELECT jsonb_build_object( 16 | 'schemas', 17 | jsonb_agg( 18 | jsonb_build_object( 19 | 'name', schema_name, 20 | 'description', description 21 | ) 22 | ) 23 | ) AS schema_list 24 | FROM schemas; -------------------------------------------------------------------------------- /server/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stuzero/pg-mcp-server/199a81bcefa4020335d33bdfd84d16299aaeaf23/server/tools/__init__.py -------------------------------------------------------------------------------- /server/tools/connection.py: -------------------------------------------------------------------------------- 1 | # server/tools/connection.py 2 | from server.config import mcp 3 | from mcp.server.fastmcp import Context 4 | from server.logging_config import get_logger 5 | 6 | logger = get_logger("pg-mcp.tools.connection") 7 | 8 | def register_connection_tools(): 9 | """Register the database connection tools with the MCP server.""" 10 | logger.debug("Registering database connection tools") 11 | 12 | @mcp.tool() 13 | async def connect(connection_string: str, *, ctx: Context): 14 | """ 15 | Register a database connection string and return its connection ID. 16 | 17 | Args: 18 | connection_string: PostgreSQL connection string (required) 19 | ctx: Request context (injected by the framework) 20 | 21 | Returns: 22 | Dictionary containing the connection ID 23 | """ 24 | # Get database from context 25 | # db = ctx.request_context.lifespan_context.get("db") 26 | db = mcp.state["db"] 27 | 28 | # Register the connection to get a connection ID 29 | conn_id = db.register_connection(connection_string) 30 | 31 | # Return the connection ID 32 | logger.info(f"Registered database connection with ID: {conn_id}") 33 | return {"conn_id": conn_id} 34 | 35 | @mcp.tool() 36 | async def disconnect(conn_id: str, *, ctx: Context): 37 | """ 38 | Close a specific database connection and remove it from the pool. 39 | 40 | Args: 41 | conn_id: Connection ID to disconnect (required) 42 | ctx: Request context (injected by the framework) 43 | 44 | Returns: 45 | Dictionary indicating success status 46 | """ 47 | # Get database from context 48 | # db = ctx.request_context.lifespan_context.get("db") 49 | db = mcp.state["db"] 50 | 51 | # Check if the connection exists 52 | if conn_id not in db._connection_map: 53 | logger.warning(f"Attempted to disconnect unknown connection ID: {conn_id}") 54 | return {"success": False, "error": "Unknown connection ID"} 55 | 56 | # Close the connection pool 57 | try: 58 | await db.close(conn_id) 59 | # Also remove from the connection mappings 60 | connection_string = db._connection_map.pop(conn_id, None) 61 | if connection_string in db._reverse_map: 62 | del db._reverse_map[connection_string] 63 | logger.info(f"Successfully disconnected database connection with ID: {conn_id}") 64 | return {"success": True} 65 | except Exception as e: 66 | logger.error(f"Error disconnecting connection {conn_id}: {e}") 67 | return {"success": False, "error": str(e)} -------------------------------------------------------------------------------- /server/tools/query.py: -------------------------------------------------------------------------------- 1 | # server/tools/query.py 2 | from server.config import mcp 3 | from mcp.server.fastmcp import Context 4 | from server.logging_config import get_logger 5 | 6 | logger = get_logger("pg-mcp.tools.query") 7 | 8 | async def execute_query(query: str, conn_id: str, params=None, ctx=Context): 9 | """ 10 | Execute a read-only SQL query against the PostgreSQL database. 11 | 12 | Args: 13 | query: The SQL query to execute (must be read-only) 14 | conn_id: Connection ID (required) 15 | params: Parameters for the query (optional) 16 | ctx: Optional request context 17 | 18 | Returns: 19 | Query results as a list of dictionaries 20 | """ 21 | 22 | # Access the database from the request context 23 | # if ctx is not None and hasattr(ctx, 'request_context'): 24 | # db = ctx.request_context.lifespan_context.get("db") 25 | # else: 26 | # raise ValueError("Database connection not available in context or MCP state.") 27 | 28 | db = mcp.state["db"] 29 | if not db: 30 | raise ValueError("Database connection not available in MCP state.") 31 | 32 | logger.info(f"Executing query on connection ID {conn_id}: {query}") 33 | 34 | async with db.get_connection(conn_id) as conn: 35 | # Ensure we're in read-only mode 36 | await conn.execute("SET TRANSACTION READ ONLY") 37 | 38 | # Execute the query 39 | try: 40 | records = await conn.fetch(query, *(params or [])) 41 | return [dict(record) for record in records] 42 | except Exception as e: 43 | # Log the error but don't couple to specific error types 44 | logger.error(f"Query execution error: {e}") 45 | raise 46 | 47 | def register_query_tools(): 48 | """Register database query tools with the MCP server.""" 49 | logger.debug("Registering query tools") 50 | 51 | @mcp.tool() 52 | async def pg_query(query: str, conn_id: str, params=None): 53 | """ 54 | Execute a read-only SQL query against the PostgreSQL database. 55 | 56 | Args: 57 | query: The SQL query to execute (must be read-only) 58 | conn_id: Connection ID previously obtained from the connect tool 59 | params: Parameters for the query (optional) 60 | 61 | Returns: 62 | Query results as a list of dictionaries 63 | """ 64 | # Execute the query using the connection ID 65 | return await execute_query(query, conn_id, params) 66 | 67 | @mcp.tool() 68 | async def pg_explain(query: str, conn_id: str, params=None): 69 | """ 70 | Execute an EXPLAIN (FORMAT JSON) query to get PostgreSQL execution plan. 71 | 72 | Args: 73 | query: The SQL query to analyze 74 | conn_id: Connection ID previously obtained from the connect tool 75 | params: Parameters for the query (optional) 76 | 77 | Returns: 78 | Complete JSON-formatted execution plan 79 | """ 80 | # Prepend EXPLAIN to the query 81 | explain_query = f"EXPLAIN (FORMAT JSON) {query}" 82 | 83 | # Execute the explain query 84 | result = await execute_query(explain_query, conn_id, params) 85 | 86 | # Return the complete result 87 | return result -------------------------------------------------------------------------------- /server/tools/viz.py: -------------------------------------------------------------------------------- 1 | # server/tools/viz.py 2 | import json 3 | from datetime import date, datetime 4 | from decimal import Decimal 5 | from sqlglot import parse_one, exp 6 | from server.config import mcp 7 | from server.logging_config import get_logger 8 | 9 | logger = get_logger("pg-mcp.tools.viz") 10 | 11 | def pg_type_to_logical(pg_type) -> str: 12 | """Maps PostgreSQL type to logical type.""" 13 | pg_type = pg_type.name.lower() 14 | if pg_type in {"int", "int4", "int8", "float4", "float8", "numeric", "decimal", "double precision"}: 15 | return "quantitative" 16 | elif pg_type in {"date", "timestamp", "timestamptz"}: 17 | return "temporal" 18 | else: 19 | return "nominal" 20 | 21 | def default_serializer(obj): 22 | if isinstance(obj, (datetime, date)): 23 | return obj.isoformat() 24 | if isinstance(obj, Decimal): 25 | return float(obj) 26 | return str(obj) 27 | 28 | async def get_query_metadata(conn_id, sql_query): 29 | """ 30 | Analyze a SQL query and produce metadata about the results. 31 | 32 | Args: 33 | conn_id: Database connection ID 34 | sql_query: The SQL query to analyze 35 | Returns: 36 | JSON metadata about the query results structure 37 | """ 38 | # Get database from mcp state 39 | db = mcp.state["db"] 40 | 41 | # Sanitize SQL - remove trailing semicolon 42 | sql_query = sql_query.strip() 43 | if sql_query.endswith(';'): 44 | sql_query = sql_query[:-1] 45 | 46 | metadata = { 47 | "fields": [], 48 | "rowCount": 0, 49 | "groupBy": [] 50 | } 51 | 52 | async with db.get_connection(conn_id) as conn: 53 | # --- Parse query AST --- 54 | try: 55 | ast = parse_one(sql_query) 56 | group_exprs = ast.args.get("group", []) 57 | if group_exprs: 58 | metadata["groupBy"] = [ 59 | g.name for g in group_exprs if isinstance(g, exp.Column) 60 | ] 61 | except Exception as e: 62 | logger.error(f"AST parse failed: {e}") 63 | 64 | # --- Get column names and types --- 65 | stmt = await conn.prepare(sql_query) 66 | column_attrs = stmt.get_attributes() 67 | 68 | for col in column_attrs: 69 | logical_type = pg_type_to_logical(col.type) 70 | field_meta = {"name": col.name, "type": logical_type} 71 | 72 | # Optional: try to get stats 73 | if logical_type == "nominal": 74 | query = f"SELECT COUNT(DISTINCT {col.name}) FROM ({sql_query}) AS subq" 75 | try: 76 | result = await conn.fetchval(query) 77 | field_meta["unique"] = result 78 | except Exception: 79 | pass 80 | 81 | elif logical_type == "temporal": 82 | query = f"SELECT MIN({col.name}), MAX({col.name}) FROM ({sql_query}) AS subq" 83 | try: 84 | result = await conn.fetchrow(query) 85 | if result: 86 | field_meta["range"] = [result[0], result[1]] 87 | except Exception: 88 | pass 89 | 90 | metadata["fields"].append(field_meta) 91 | 92 | # --- Row count --- 93 | try: 94 | result = await conn.fetchval(f"SELECT COUNT(*) FROM ({sql_query}) AS subq") 95 | metadata["rowCount"] = result 96 | except Exception as e: 97 | logger.error(f"Row count failed: {e}") 98 | 99 | return json.dumps(metadata, indent=2, default=default_serializer) 100 | 101 | def register_viz_tools(): 102 | """Register visualization tools with the MCP server.""" 103 | logger.debug("Registering vizualization tools") 104 | 105 | @mcp.tool() 106 | async def pg_metadata(conn_id: str, sql_query: str): 107 | """ 108 | Analyzes a SQL query and produces visualization metadata. 109 | 110 | Args: 111 | conn_id: Connection ID previously obtained from the connect tool 112 | sql_query: The SQL query to analyze 113 | 114 | Returns: 115 | JSON metadata about the query results structure 116 | """ 117 | # Call the function to get query metadata 118 | return await get_query_metadata(conn_id, sql_query) -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | # test.py 2 | import asyncio 3 | import httpx 4 | import json 5 | import sys 6 | from mcp import ClientSession 7 | from mcp.client.sse import sse_client 8 | 9 | async def run(connection_string: str | None): 10 | """Test the MCP server with an optional database connection string.""" 11 | # Assuming your server is running on localhost:8000 12 | server_url = "http://localhost:8000/sse" 13 | 14 | try: 15 | print(f"Connecting to MCP server at {server_url}...") 16 | if connection_string: 17 | # Clean and sanitize the connection string 18 | clean_connection = connection_string.strip() 19 | # Only show a small part of the connection string for security 20 | masked_conn_string = clean_connection[:10] + "..." if len(clean_connection) > 10 else clean_connection 21 | print(f"Using database connection: {masked_conn_string}") 22 | 23 | # Create the SSE client context manager 24 | async with sse_client(url=server_url) as streams: 25 | print("SSE streams established, creating session...") 26 | 27 | # Create and initialize the MCP ClientSession 28 | async with ClientSession(*streams) as session: 29 | print("Session created, initializing...") 30 | 31 | # Initialize the connection 32 | await session.initialize() 33 | print("Connection initialized!") 34 | 35 | # List available prompts 36 | prompts_response = await session.list_prompts() 37 | print(f"Available prompts: {prompts_response}") 38 | 39 | # List available tools 40 | tools_response = await session.list_tools() 41 | tools = tools_response.tools 42 | print(f"Available tools: {[tool.name for tool in tools]}") 43 | 44 | # List available resources 45 | resources_response = await session.list_resources() 46 | print(f"Available resources: {resources_response}") 47 | 48 | # List available resource templates 49 | templates_response = await session.list_resource_templates() 50 | print(f"Available resource templates: {templates_response}") 51 | 52 | # Test with a connection if provided 53 | if connection_string: 54 | # Check if required tools are available 55 | has_connect = any(tool.name == 'connect' for tool in tools) 56 | has_pg_query = any(tool.name == 'pg_query' for tool in tools) 57 | 58 | if not has_connect: 59 | print("\nERROR: 'connect' tool is not available on the server") 60 | return 61 | 62 | if not has_pg_query: 63 | print("\nERROR: 'pg_query' tool is not available on the server") 64 | return 65 | 66 | try: 67 | # Use the cleaned connection string 68 | clean_connection = connection_string.strip() 69 | 70 | # First, register the connection to get a conn_id 71 | print("\nRegistering connection with 'connect' tool...") 72 | connect_result = await session.call_tool( 73 | "connect", 74 | { 75 | "connection_string": clean_connection 76 | } 77 | ) 78 | 79 | # Extract conn_id from the response 80 | conn_id = None 81 | if hasattr(connect_result, 'content') and connect_result.content: 82 | content = connect_result.content[0] 83 | if hasattr(content, 'text'): 84 | try: 85 | result_data = json.loads(content.text) 86 | conn_id = result_data.get('conn_id') 87 | print(f"Successfully connected with connection ID: {conn_id}") 88 | except json.JSONDecodeError: 89 | print(f"Error parsing connect result: {content.text[:100]}") 90 | 91 | if not conn_id: 92 | print("Failed to get connection ID from connect tool") 93 | return 94 | 95 | # Test pg_query using the conn_id 96 | print("\nTesting 'pg_query' tool with connection ID...") 97 | query_result = await session.call_tool( 98 | "pg_query", 99 | { 100 | "query": "SELECT version() AS version", 101 | "conn_id": conn_id 102 | } 103 | ) 104 | 105 | # Process the query result 106 | if hasattr(query_result, 'content') and query_result.content: 107 | content = query_result.content[0] 108 | if hasattr(content, 'text'): 109 | try: 110 | version_data = json.loads(content.text) 111 | if isinstance(version_data, list) and len(version_data) > 0: 112 | print(f"Query executed successfully: {version_data[0].get('version', 'Unknown')}") 113 | else: 114 | print(f"Query executed successfully: {version_data}") 115 | except json.JSONDecodeError: 116 | print(f"Error parsing query result: {content.text[:100]}") 117 | else: 118 | print("Query executed but text content not available") 119 | else: 120 | print("Query executed but no content returned") 121 | 122 | # Test pg_explain if available 123 | has_pg_explain = any(tool.name == 'pg_explain' for tool in tools) 124 | if has_pg_explain: 125 | print("\nTesting 'pg_explain' tool...") 126 | explain_result = await session.call_tool( 127 | "pg_explain", 128 | { 129 | "query": "SELECT version()", 130 | "conn_id": conn_id 131 | } 132 | ) 133 | 134 | if hasattr(explain_result, 'content') and explain_result.content: 135 | content = explain_result.content[0] 136 | if hasattr(content, 'text'): 137 | try: 138 | explain_data = json.loads(content.text) 139 | print(f"EXPLAIN query executed successfully. Result contains {len(explain_data)} rows.") 140 | # Pretty print a snippet of the execution plan 141 | print(json.dumps(explain_data, indent=2)[:500] + "...") 142 | except json.JSONDecodeError: 143 | print(f"Error parsing EXPLAIN result: {content.text[:100]}") 144 | 145 | # Test resources with the conn_id 146 | print("\nTesting schema resources with connection ID...") 147 | schema_resource = f"pgmcp://{conn_id}/schemas" 148 | schema_response = await session.read_resource(schema_resource) 149 | 150 | # Process schema response 151 | response_content = None 152 | if hasattr(schema_response, 'content') and schema_response.content: 153 | response_content = schema_response.content 154 | elif hasattr(schema_response, 'contents') and schema_response.contents: 155 | response_content = schema_response.contents 156 | 157 | if response_content: 158 | content_item = response_content[0] 159 | if hasattr(content_item, 'text'): 160 | try: 161 | schemas_data = json.loads(content_item.text) 162 | print(f"Successfully retrieved {len(schemas_data)} schemas") 163 | 164 | # Print first few schemas 165 | for i, schema in enumerate(schemas_data[:3]): 166 | schema_name = schema.get('schema_name') 167 | print(f" - {schema_name}") 168 | if i >= 2 and len(schemas_data) > 3: 169 | print(f" ... and {len(schemas_data) - 3} more") 170 | break 171 | 172 | # If we have schemas, test extensions resource 173 | if schemas_data and len(schemas_data) > 0: 174 | schema_name = schemas_data[0].get('schema_name') 175 | print(f"\nTesting extensions for schema '{schema_name}'...") 176 | extensions_resource = f"pgmcp://{conn_id}/schemas/{schema_name}/extensions" 177 | 178 | try: 179 | extensions_response = await session.read_resource(extensions_resource) 180 | 181 | # Process extensions response 182 | ext_content = None 183 | if hasattr(extensions_response, 'content') and extensions_response.content: 184 | ext_content = extensions_response.content 185 | elif hasattr(extensions_response, 'contents') and extensions_response.contents: 186 | ext_content = extensions_response.contents 187 | 188 | if ext_content: 189 | content_item = ext_content[0] 190 | if hasattr(content_item, 'text'): 191 | extensions_data = json.loads(content_item.text) 192 | print(f"Successfully retrieved {len(extensions_data)} extensions") 193 | 194 | # Print extensions and check for context 195 | for ext in extensions_data: 196 | has_context = ext.get('context_available', False) 197 | context_flag = " (has context)" if has_context else "" 198 | print(f" - {ext.get('name')} v{ext.get('version')}{context_flag}") 199 | 200 | # If extension has context, test getting it 201 | if has_context: 202 | ext_name = ext.get('name') 203 | print(f"\nFetching context for extension '{ext_name}'...") 204 | context_resource = f"pgmcp://{conn_id}/schemas/{schema_name}/extensions/{ext_name}" 205 | 206 | try: 207 | context_response = await session.read_resource(context_resource) 208 | 209 | ctx_content = None 210 | if hasattr(context_response, 'content') and context_response.content: 211 | ctx_content = context_response.content 212 | elif hasattr(context_response, 'contents') and context_response.contents: 213 | ctx_content = context_response.contents 214 | 215 | if ctx_content: 216 | content_item = ctx_content[0] 217 | if hasattr(content_item, 'text'): 218 | try: 219 | context_data = content_item.text 220 | if isinstance(context_data, str) and context_data.strip(): 221 | print(f"Retrieved context information for {ext_name}") 222 | # Don't print the whole context, just confirm it exists 223 | yaml_data = json.loads(context_data) 224 | print(f"Context contains sections: {', '.join(yaml_data.keys())}") 225 | else: 226 | print(f"Empty context received for {ext_name}") 227 | except json.JSONDecodeError: 228 | # Might be YAML directly 229 | print(f"Retrieved non-JSON context for {ext_name}") 230 | except Exception as e: 231 | print(f"Error fetching extension context: {e}") 232 | except Exception as e: 233 | print(f"Error fetching extensions: {e}") 234 | 235 | # Find a schema with tables to test table resources 236 | for schema_idx, schema in enumerate(schemas_data[:3]): 237 | schema_name = schema.get('schema_name') 238 | 239 | print(f"\nTesting tables for schema '{schema_name}'...") 240 | tables_resource = f"pgmcp://{conn_id}/schemas/{schema_name}/tables" 241 | tables_response = await session.read_resource(tables_resource) 242 | 243 | # Process tables response 244 | tables_content = None 245 | if hasattr(tables_response, 'content') and tables_response.content: 246 | tables_content = tables_response.content 247 | elif hasattr(tables_response, 'contents') and tables_response.contents: 248 | tables_content = tables_response.contents 249 | 250 | if tables_content: 251 | content_item = tables_content[0] 252 | if hasattr(content_item, 'text'): 253 | tables_data = json.loads(content_item.text) 254 | print(f"Found {len(tables_data)} tables in schema '{schema_name}'") 255 | 256 | if tables_data and len(tables_data) > 0: 257 | # Print first few tables 258 | for i, table in enumerate(tables_data[:3]): 259 | table_name = table.get('table_name') 260 | print(f" - {table_name}") 261 | if i >= 2 and len(tables_data) > 3: 262 | print(f" ... and {len(tables_data) - 3} more") 263 | break 264 | 265 | # Test table details for first table 266 | table_name = tables_data[0].get('table_name') 267 | print(f"\nTesting columns for table '{schema_name}.{table_name}'...") 268 | 269 | columns_resource = f"pgmcp://{conn_id}/schemas/{schema_name}/tables/{table_name}/columns" 270 | columns_response = await session.read_resource(columns_resource) 271 | 272 | # Process columns response 273 | cols_content = None 274 | if hasattr(columns_response, 'content') and columns_response.content: 275 | cols_content = columns_response.content 276 | elif hasattr(columns_response, 'contents') and columns_response.contents: 277 | cols_content = columns_response.contents 278 | 279 | if cols_content: 280 | content_item = cols_content[0] 281 | if hasattr(content_item, 'text'): 282 | columns_data = json.loads(content_item.text) 283 | print(f"Found {len(columns_data)} columns in table '{table_name}'") 284 | 285 | # Print first few columns 286 | for i, col in enumerate(columns_data[:3]): 287 | col_name = col.get('column_name') 288 | data_type = col.get('data_type') 289 | print(f" - {col_name} ({data_type})") 290 | if i >= 2 and len(columns_data) > 3: 291 | print(f" ... and {len(columns_data) - 3} more") 292 | break 293 | 294 | # Test disconnect tool if available 295 | break # Exit schema loop once we've found a table 296 | except json.JSONDecodeError: 297 | print(f"Error parsing schemas: {content_item.text[:100]}") 298 | 299 | # Finally, test the disconnect tool if available 300 | has_disconnect = any(tool.name == 'disconnect' for tool in tools) 301 | if has_disconnect and conn_id: 302 | print("\nTesting 'disconnect' tool...") 303 | disconnect_result = await session.call_tool( 304 | "disconnect", 305 | { 306 | "conn_id": conn_id 307 | } 308 | ) 309 | 310 | if hasattr(disconnect_result, 'content') and disconnect_result.content: 311 | content = disconnect_result.content[0] 312 | if hasattr(content, 'text'): 313 | try: 314 | result_data = json.loads(content.text) 315 | success = result_data.get('success', False) 316 | if success: 317 | print(f"Successfully disconnected connection {conn_id}") 318 | else: 319 | error = result_data.get('error', 'Unknown error') 320 | print(f"Failed to disconnect: {error}") 321 | except json.JSONDecodeError: 322 | print(f"Error parsing disconnect result: {content.text[:100]}") 323 | else: 324 | print("Disconnect call completed but no result returned") 325 | 326 | except Exception as e: 327 | print(f"Error during connection tests: {e}") 328 | else: 329 | print("\nNo connection string provided, skipping database tests") 330 | 331 | except httpx.HTTPStatusError as e: 332 | print(f"HTTP Error: {e}") 333 | print(f"Status code: {e.response.status_code}") 334 | print(f"Response body: {e.response.text}") 335 | except httpx.ConnectError: 336 | print(f"Connection Error: Could not connect to server at {server_url}") 337 | print("Make sure the server is running and the URL is correct") 338 | except Exception as e: 339 | print(f"Error: {type(e).__name__}: {e}") 340 | 341 | if __name__ == "__main__": 342 | # Get database connection string from command line argument 343 | connection_string = sys.argv[1] if len(sys.argv) > 1 else None 344 | asyncio.run(run(connection_string)) --------------------------------------------------------------------------------