├── puppygraph ├── data │ ├── __init__.py │ ├── host │ │ ├── __init__.py │ │ └── host_config.py │ ├── mapping │ │ ├── __init__.py │ │ ├── database_params.py │ │ ├── graph_element_cache_config.py │ │ ├── catalog_config.py │ │ ├── datalake_params.py │ │ └── graph_mapping_config.py │ └── schema │ │ ├── __init__.py │ │ └── graph_schema.py ├── client │ ├── __init__.py │ └── client.py ├── common │ ├── __init__.py │ ├── dataclass_utils.py │ ├── conversion_utils.py │ └── test_conversion_utils.py ├── rag │ ├── __init__.py │ └── graph_agent.py └── __init__.py ├── apps ├── chatbot │ ├── .gitignore │ ├── requirements.txt │ ├── .env.example │ ├── run.sh │ ├── CLAUDE.md │ ├── test_query_limits.py │ ├── test_integration.py │ ├── README.md │ ├── mcp_server.py │ ├── rag_system.py │ ├── backend.py │ └── gradio_app.py ├── databricks_mining_site │ ├── README.md │ ├── run_agent.py │ └── set_graph_schema.py └── imdb │ └── run_agent.py ├── pyproject.toml ├── .gitignore ├── README.md └── LICENSE /puppygraph/data/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/client/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/common/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/data/host/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/data/mapping/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/data/schema/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /puppygraph/rag/__init__.py: -------------------------------------------------------------------------------- 1 | from puppygraph.rag.graph_agent import PuppyGraphAgent 2 | 3 | __all__ = ["PuppyGraphAgent"] 4 | -------------------------------------------------------------------------------- /apps/chatbot/.gitignore: -------------------------------------------------------------------------------- 1 | # Virtual environment 2 | venv/ 3 | 4 | # Environment variables 5 | .env 6 | 7 | # Python cache 8 | __pycache__/ -------------------------------------------------------------------------------- /puppygraph/__init__.py: -------------------------------------------------------------------------------- 1 | from puppygraph.client.client import PuppyGraphClient 2 | from puppygraph.data.host.host_config import PuppyGraphHostConfig 3 | from puppygraph.data.schema.graph_schema import PuppyGraphSchema 4 | 5 | __all__ = ["PuppyGraphClient", "PuppyGraphHostConfig", "PuppyGraphSchema"] 6 | -------------------------------------------------------------------------------- /apps/chatbot/requirements.txt: -------------------------------------------------------------------------------- 1 | gradio==5.45.0 2 | mcp==1.14.0 3 | neo4j==5.28.2 4 | requests==2.32.5 5 | sentence-transformers==5.1.0 6 | chromadb==1.0.21 7 | langchain==0.3.27 8 | langchain-community==0.3.29 9 | anthropic==0.67.0 10 | pydantic==2.11.7 11 | uvicorn==0.35.0 12 | fastapi==0.116.1 13 | python-dotenv==1.1.1 -------------------------------------------------------------------------------- /apps/chatbot/.env.example: -------------------------------------------------------------------------------- 1 | # Anthropic API Key for text-to-cypher generation 2 | ANTHROPIC_API_KEY=your_anthropic_api_key_here 3 | 4 | # PuppyGraph Configuration 5 | PUPPYGRAPH_BOLT_URI=bolt://localhost:7687 6 | PUPPYGRAPH_HTTP_URI=http://localhost:8081 7 | PUPPYGRAPH_USERNAME=puppygraph 8 | PUPPYGRAPH_PASSWORD=puppygraph123 9 | 10 | # Optional: Gradio Configuration 11 | GRADIO_SERVER_PORT=7860 12 | GRADIO_SERVER_NAME=0.0.0.0 -------------------------------------------------------------------------------- /puppygraph/data/mapping/database_params.py: -------------------------------------------------------------------------------- 1 | """Defines Database parameters for a catalog.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import List, Optional, Union 5 | 6 | 7 | @dataclass(frozen=True) 8 | class JDBCParam: 9 | """JDBC connection parameters for a database.""" 10 | 11 | username: str 12 | password: str 13 | jdbc_uri: str 14 | driver_class: str 15 | driver_url: str 16 | 17 | 18 | @dataclass(frozen=True) 19 | class ElasticSearchParam: 20 | """ElasticSearch connection parameters for a database.""" 21 | 22 | hosts: List[str] 23 | username: Optional[str] = None 24 | password: Optional[str] = None 25 | 26 | 27 | DatabaseParams = Union[JDBCParam, ElasticSearchParam] 28 | -------------------------------------------------------------------------------- /apps/databricks_mining_site/README.md: -------------------------------------------------------------------------------- 1 | # PuppyGraphAgent for Mining Site (Databricks Blog) 2 | 3 | ## Overview 4 | This project demonstrates the use of PuppyGraphAgent for processing and analyzing data in a mining site environment using Databricks. 5 | 6 | ## Tables 7 | 8 | - **Work Orders**: `pg_databricks.gold.work_orders` 9 | - **Troubleshooting Guide**: `pg_databricks.silver.troubleshooting_guide` 10 | - **Assets**: `pg_databricks.silver.assets` 11 | - **Failure Type**: `pg_databricks.bronze.failure_type` 12 | 13 | ## How to Run 14 | 15 | 1. Set up the schema connection to PuppyGraph: 16 | ```bash 17 | python set_schema.py 18 | ``` 19 | 2. Run the graph agent: 20 | ```bash 21 | python run_agent.py 22 | ``` 23 | 24 | ## Requirements 25 | 26 | See [tool.poetry.group.app.dependencies] for the required dependencies. 27 | -------------------------------------------------------------------------------- /puppygraph/data/host/host_config.py: -------------------------------------------------------------------------------- 1 | """Config for PuppyGraph server.""" 2 | 3 | from dataclasses import dataclass 4 | 5 | 6 | @dataclass(frozen=True) 7 | class PuppyGraphHostConfig: 8 | """PuppyGraph host configuration.""" 9 | 10 | # IP address of the PuppyGraph host 11 | ip: str 12 | 13 | # HTTP port of the PuppyGraph server 14 | http_port: int = 8081 15 | 16 | # Cypher query port of the PuppyGraph server 17 | cypher_port: int = 7687 18 | 19 | # Maximum connection lifetime for the Cypher query driver 20 | cypher_max_connection_lifetime: int = 30 21 | 22 | # Gremlin query port of the PuppyGraph server 23 | gremlin_port: int = 8182 24 | 25 | # Username to authenticate with the PuppyGraph server 26 | username: str = "puppygraph" 27 | 28 | # Password to authenticate with the PuppyGraph server 29 | password: str = "puppygraph123" 30 | -------------------------------------------------------------------------------- /puppygraph/data/mapping/graph_element_cache_config.py: -------------------------------------------------------------------------------- 1 | """PuppyGraph Element Cache config.""" 2 | 3 | from dataclasses import dataclass 4 | from enum import Enum 5 | from typing import Optional 6 | 7 | 8 | class TimeUnit(Enum): 9 | """Enum for time units in cache TTL.""" 10 | 11 | YEAR = 0 12 | MONTH = 1 13 | DAY = 2 14 | HOUR = 3 15 | MINUTE = 4 16 | PARTITION = 5 17 | 18 | 19 | @dataclass(frozen=True) 20 | class TimeToLive: 21 | """Time-to-live configuration for cache.""" 22 | 23 | amount: int 24 | unit: TimeUnit 25 | 26 | 27 | @dataclass(frozen=True) 28 | class RefreshStrategy: 29 | """Refresh strategy configuration for cache.""" 30 | 31 | parallelism: int 32 | 33 | 34 | @dataclass(frozen=True) 35 | class GraphElementCacheConfig: 36 | """Cache configuration for elements.""" 37 | 38 | partition_key: str 39 | partition_ttl: Optional[TimeToLive] = None 40 | refresh_strategy: Optional[RefreshStrategy] = None 41 | -------------------------------------------------------------------------------- /puppygraph/common/dataclass_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for working with dataclasses.""" 2 | 3 | from dataclasses import asdict 4 | from enum import Enum 5 | 6 | 7 | def dataclass_to_camel_dict(instance) -> dict: 8 | """Convert a dataclass instance to a camelCase dictionary, omitting fields with None values.""" 9 | if not hasattr(instance, "__dataclass_fields__"): 10 | raise ValueError("The provided instance is not a dataclass.") 11 | 12 | def _snake_to_camel(snake_str: str) -> str: 13 | """Convert a snake_case string to camelCase.""" 14 | components = snake_str.split("_") 15 | return components[0] + "".join(x.title() for x in components[1:]) 16 | 17 | def _convert_value(value): 18 | """Convert Enum to its name, and keep other types as they are.""" 19 | if isinstance(value, Enum): 20 | return value.name 21 | return value 22 | 23 | return { 24 | _snake_to_camel(k): _convert_value(v) 25 | for k, v in asdict(instance).items() 26 | if v is not None 27 | } 28 | -------------------------------------------------------------------------------- /puppygraph/data/mapping/catalog_config.py: -------------------------------------------------------------------------------- 1 | """Catalog configuration dataclasses and enums. 2 | 3 | This module defines the access configurations for different types of data sources. 4 | """ 5 | 6 | from dataclasses import dataclass 7 | from enum import Enum 8 | from typing import Union 9 | 10 | from puppygraph.data.mapping.database_params import DatabaseParams 11 | from puppygraph.data.mapping.datalake_params import DatalakeParams 12 | 13 | 14 | class CatalogType(Enum): 15 | """Defines the source type of a catalog.""" 16 | 17 | UNKNOWN = 0 18 | HIVE = 1 19 | ICEBERG = 2 20 | HUDI = 3 21 | DELTALAKE = 4 22 | MYSQL = 5 23 | POSTGRESQL = 6 24 | ELASTICSEARCH = 7 25 | REDSHIFT = 8 26 | DUCKDB = 9 27 | BIGQUERY = 10 28 | SNOWFLAKE = 11 29 | TRINO = 12 30 | VERTICA = 13 31 | SINGLESTORE = 14 32 | 33 | 34 | @dataclass(frozen=True) 35 | class CatalogConfig: 36 | """Defines the source of a catalog for PuppyGraph to construct the graph from.""" 37 | 38 | name: str 39 | type: CatalogType 40 | params: Union[DatalakeParams, DatabaseParams] 41 | -------------------------------------------------------------------------------- /puppygraph/data/schema/graph_schema.py: -------------------------------------------------------------------------------- 1 | """PuppyGraph schema definition.""" 2 | 3 | from dataclasses import dataclass, field 4 | from typing import List, Optional 5 | 6 | 7 | @dataclass(frozen=True) 8 | class AttributeSchema: 9 | """Attribute schema.""" 10 | 11 | type: str 12 | name: str 13 | description: Optional[str] = None 14 | 15 | 16 | @dataclass(frozen=True) 17 | class VertexSchema: 18 | """Vertex schema.""" 19 | 20 | label: str 21 | attributes: List[AttributeSchema] = field(default_factory=list) 22 | description: Optional[str] = None 23 | 24 | 25 | @dataclass(frozen=True) 26 | class EdgeSchema: 27 | """Edge schema.""" 28 | 29 | label: str 30 | from_vertex_label: str 31 | to_vertex_label: str 32 | attributes: List[AttributeSchema] = field(default_factory=list) 33 | description: Optional[str] = None 34 | 35 | 36 | @dataclass(frozen=True) 37 | class PuppyGraphSchema: 38 | """The PuppyGraph schema.""" 39 | 40 | vertex_schemas: List[VertexSchema] = field(default_factory=list) 41 | edge_schemas: List[EdgeSchema] = field(default_factory=list) 42 | description: Optional[str] = None 43 | -------------------------------------------------------------------------------- /puppygraph/data/mapping/datalake_params.py: -------------------------------------------------------------------------------- 1 | """Defines Datalake parameters for a catalog.""" 2 | 3 | from dataclasses import dataclass 4 | from typing import Optional, Union 5 | 6 | 7 | @dataclass(frozen=True) 8 | class UnityMetastoreParam: 9 | """Unity metastore parameters.""" 10 | 11 | token: Optional[str] = None 12 | host: Optional[str] = None 13 | unity_catalog_name: Optional[str] = None 14 | 15 | 16 | @dataclass(frozen=True) 17 | class S3StorageParam: 18 | """S3 storage parameters.""" 19 | 20 | use_instance_profile: Optional[str] = None 21 | region: Optional[str] = None 22 | access_key: Optional[str] = None 23 | secret_key: Optional[str] = None 24 | iam_role_arn: Optional[str] = None 25 | enable_ssl: Optional[str] = None 26 | endpoint: Optional[str] = None 27 | enable_path_style_access: Optional[str] = None 28 | 29 | 30 | MetastoreParam = Union[UnityMetastoreParam] 31 | StorageParam = Union[S3StorageParam] 32 | 33 | 34 | @dataclass(frozen=True) 35 | class DatalakeParams: 36 | """Datalake parameters for a catalog.""" 37 | 38 | metastore_param: Optional[MetastoreParam] = None 39 | storage_param: Optional[StorageParam] = None 40 | -------------------------------------------------------------------------------- /apps/chatbot/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # PuppyGraph RAG Chatbot Launcher Script 4 | 5 | set -e 6 | 7 | echo "🐶 PuppyGraph RAG Chatbot Demo" 8 | echo "================================" 9 | 10 | # Check if virtual environment exists 11 | if [ ! -d "venv" ]; then 12 | echo "📦 Creating virtual environment..." 13 | python3 -m venv venv 14 | fi 15 | 16 | # Activate virtual environment 17 | echo "🔄 Activating virtual environment..." 18 | source venv/bin/activate 19 | 20 | # Install dependencies 21 | echo "📥 Installing dependencies..." 22 | pip install -r requirements.txt 23 | 24 | # Check if .env file exists 25 | if [ ! -f ".env" ]; then 26 | echo "⚠️ .env file not found. Copying from .env.example..." 27 | cp .env.example .env 28 | echo "🔧 Please edit .env with your configuration before running!" 29 | echo " Especially set your ANTHROPIC_API_KEY" 30 | fi 31 | 32 | # Run integration tests 33 | echo "🧪 Running integration tests..." 34 | python test_integration.py 35 | 36 | if [ $? -eq 0 ]; then 37 | echo "✅ Integration tests passed!" 38 | echo "" 39 | echo "🚀 Starting PuppyGraph RAG Chatbot..." 40 | echo " Access the UI at: http://localhost:7860" 41 | echo " Press Ctrl+C to stop" 42 | echo "" 43 | 44 | # Start the application 45 | python gradio_app.py 46 | else 47 | echo "❌ Integration tests failed. Please check the configuration." 48 | exit 1 49 | fi -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "puppygraph" 7 | version = "0.1.5" 8 | description = "The project contains the PuppyGraph client and PuppyGraph's Agentic Graph RAG libraries for Python. PuppyGraph client enables dynamic graph querying across multiple data sources with zero ETL, flexible schema management, and support for Cypher and Gremlin queries. PuppyGraph agentic graph RAG library makes graph-aware AI Agents." 9 | authors = ["PuppyGraph "] 10 | license = "Apache-2.0" 11 | readme = "README.md" 12 | packages=[{include = "puppygraph"}] 13 | exclude = ["puppygraph/**/test*_.py"] 14 | 15 | 16 | [tool.poetry.dependencies] 17 | python = ">=3.9,<3.13" 18 | requests = "^2.28" 19 | gremlinpython = "^3.7.2" 20 | neo4j = "^5.7.0" 21 | dacite = "^1.7.0" 22 | async-timeout="^4.0.3" 23 | langchain-core = "^0.3.51" 24 | jinja2 = "^3.1.4" 25 | 26 | [tool.poetry.group.dev] 27 | optional = true 28 | 29 | [tool.poetry.group.dev.dependencies] 30 | pytest = "^7.4" 31 | pytest-cov = "^4.1" 32 | mypy = "^1.10" 33 | 34 | 35 | [tool.poetry.group.apps] 36 | optional = true 37 | 38 | [tool.poetry.group.apps.dependencies] 39 | langchain-openai = "^0.3.12" 40 | langchain-community = "^0.3.21" 41 | langchain-anthropic = "^0.2.4" 42 | gradio = "^4.42.0" 43 | 44 | 45 | [tool.mypy] 46 | ignore_missing_imports = true 47 | disallow_untyped_defs = true 48 | disallow_untyped_calls = true 49 | -------------------------------------------------------------------------------- /puppygraph/data/mapping/graph_mapping_config.py: -------------------------------------------------------------------------------- 1 | """PuppyGraph construction config.""" 2 | 3 | from dataclasses import dataclass, field 4 | from typing import List, Optional 5 | 6 | from puppygraph.data.mapping.catalog_config import CatalogConfig 7 | from puppygraph.data.mapping.graph_element_cache_config import ( 8 | GraphElementCacheConfig, 9 | ) 10 | 11 | 12 | @dataclass(frozen=True) 13 | class TableSource: 14 | """Table source.""" 15 | 16 | catalog_name: str 17 | schema_name: str 18 | table_name: str 19 | 20 | 21 | @dataclass(frozen=True) 22 | class MappedField: 23 | """MappedAttributes from the source table to the graph schema.""" 24 | 25 | name: str 26 | type: str 27 | from_field: str 28 | description: Optional[str] = None 29 | 30 | 31 | @dataclass(frozen=True) 32 | class GraphElementConfig: 33 | """Graph Element config. 34 | 35 | A graph element is a vertex or an edge. 36 | """ 37 | 38 | table_source: TableSource 39 | 40 | label: str 41 | 42 | # List of attributes, each attribute is a field in the source table 43 | attributes: List[MappedField] 44 | 45 | # The ID can from single field or list of fields (composite key) 46 | id: List[MappedField] 47 | 48 | # Description of the element 49 | description: Optional[str] = None 50 | 51 | # Cache config 52 | cache_config: Optional[GraphElementCacheConfig] = None 53 | 54 | # ID of the from vertex, only applicable if the element is an edge 55 | from_id: Optional[List[MappedField]] = None 56 | 57 | # ID of the to vertex, only applicable if the element is an edge 58 | to_id: Optional[List[MappedField]] = None 59 | 60 | # Label of the from vertex, only applicable if the element is an edge 61 | from_label: Optional[str] = None 62 | 63 | # Label of the to vertex, only applicable if the element is an edge 64 | to_label: Optional[str] = None 65 | 66 | 67 | @dataclass(frozen=True) 68 | class PuppyGraphMappingConfig: 69 | """PuppyGraph Mapping config. 70 | 71 | This config defines how the graph is mapped from the source tables. 72 | """ 73 | 74 | # Catalogs to fetch tables from 75 | catalogs: List[CatalogConfig] = field(default_factory=list) 76 | 77 | vertices: List[GraphElementConfig] = field(default_factory=list) 78 | 79 | edges: List[GraphElementConfig] = field(default_factory=list) 80 | 81 | # description of the graph 82 | description: Optional[str] = None 83 | 84 | 85 | if __name__ == "__main__": 86 | pass 87 | # print(GraphElementConfig) 88 | # print(PuppyGraphConstruction 89 | -------------------------------------------------------------------------------- /apps/chatbot/CLAUDE.md: -------------------------------------------------------------------------------- 1 | # CLAUDE.md 2 | 3 | This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. 4 | 5 | ## Development Commands 6 | 7 | ### Quick Start 8 | ```bash 9 | # Run the complete application with setup 10 | ./run.sh 11 | 12 | # Or run individual components: 13 | python gradio_app.py # Start web UI at http://localhost:7860 14 | python mcp_server.py # Run MCP server standalone 15 | python test_integration.py # Run integration tests 16 | ``` 17 | 18 | ### Setup Commands 19 | ```bash 20 | # Install dependencies 21 | pip install -r requirements.txt 22 | 23 | # Create and activate virtual environment 24 | python3 -m venv venv 25 | source venv/bin/activate 26 | 27 | # Environment setup 28 | cp .env.example .env # Edit with your API keys 29 | ``` 30 | 31 | ### Testing 32 | ```bash 33 | # Integration tests (checks all components) 34 | python test_integration.py 35 | 36 | # Test individual components 37 | python -c "from backend import get_chatbot; chatbot = get_chatbot(); print(chatbot.get_graph_stats())" 38 | python -c "from rag_system import TextToCypherRAG; rag = TextToCypherRAG(); print('RAG system OK')" 39 | ``` 40 | 41 | ## Architecture Overview 42 | 43 | ### Core Components 44 | This is a RAG-powered chatbot that converts natural language queries to Cypher queries for PuppyGraph: 45 | 46 | 1. **gradio_app.py** - Main web interface (Gradio UI) 47 | 2. **backend.py** - Central coordinator (`PuppyGraphChatbot` class) 48 | 3. **rag_system.py** - Text-to-Cypher conversion using embeddings + Claude Sonnet 4.0 49 | 4. **mcp_server.py** - Model Context Protocol server for PuppyGraph operations 50 | 51 | ### Data Flow 52 | ``` 53 | User Question → Gradio UI → Backend → RAG System → Claude Sonnet 4.0 → Cypher Query → MCP Server → PuppyGraph → Results 54 | ``` 55 | 56 | ### Key Classes 57 | - `PuppyGraphChatbot` (backend.py) - Main orchestrator 58 | - `TextToCypherRAG` (rag_system.py) - Handles NL→Cypher conversion using ChromaDB + embeddings 59 | - `PuppyGraphMCPServer` (mcp_server.py) - MCP tools for schema, query execution, validation 60 | 61 | ## Configuration 62 | 63 | ### Required Environment Variables 64 | - `ANTHROPIC_API_KEY` - Required for Claude Sonnet 4.0 integration 65 | - `PUPPYGRAPH_BOLT_URI` - Default: `bolt://localhost:7687` 66 | - `PUPPYGRAPH_HTTP_URI` - Default: `http://localhost:8081` 67 | - `PUPPYGRAPH_USERNAME` - Default: `puppygraph` 68 | - `PUPPYGRAPH_PASSWORD` - Default: `puppygraph123` 69 | 70 | ### Dependencies 71 | - **Core**: gradio, anthropic, mcp, neo4j, requests 72 | - **RAG**: sentence-transformers, chromadb, langchain 73 | - **Server**: uvicorn, fastapi, python-dotenv 74 | 75 | ## Development Notes 76 | 77 | ### Multi-Round Query Execution 78 | The system automatically breaks complex questions into multiple Cypher queries. Each round uses results from previous queries to generate more specific follow-ups. 79 | 80 | ### RAG System Details 81 | - Uses `all-MiniLM-L6-v2` for embeddings by default 82 | - ChromaDB stores question/Cypher examples 83 | - Claude Sonnet 4.0 generates queries using retrieved examples as context 84 | - Examples can be added via UI or programmatically 85 | 86 | ### MCP Integration 87 | The MCP server provides these tools: 88 | - `execute_cypher` - Run Cypher queries with result formatting 89 | - `get_schema_info` - Retrieve graph schema with optional node samples 90 | - `validate_cypher` - Validate query syntax before execution 91 | 92 | ### Error Handling 93 | - Connection failures to PuppyGraph are handled gracefully 94 | - RAG system falls back to basic query generation if examples are unavailable 95 | - All components have comprehensive logging -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | .idea/ 163 | apps/private -------------------------------------------------------------------------------- /apps/chatbot/test_query_limits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Test script to verify query limit functionality 4 | """ 5 | 6 | import logging 7 | import sys 8 | from rag_system import TextToCypherRAG, QueryStep 9 | 10 | logging.basicConfig(level=logging.INFO) 11 | logger = logging.getLogger("test_query_limits") 12 | 13 | def test_query_limits(): 14 | """Test that the system properly handles query limits""" 15 | 16 | try: 17 | # Initialize RAG system 18 | rag_system = TextToCypherRAG() 19 | 20 | # Mock schema 21 | mock_schema = { 22 | "vertices": [{"label": "TestNode", "attributes": []}], 23 | "edges": [] 24 | } 25 | 26 | # Test with max_rounds = 3 (should force stop on 3rd round) 27 | max_rounds = 3 28 | question = "Test query with forced limits" 29 | previous_steps = [] 30 | 31 | # Simulate first round 32 | logger.info(f"=== Testing Round 1 (should continue) ===") 33 | cypher, explanation, should_stop, prompt, llm_response = rag_system.generate_next_query( 34 | question, mock_schema, previous_steps, max_rounds 35 | ) 36 | 37 | if should_stop: 38 | logger.error("❌ Round 1: System stopped unexpectedly") 39 | return False 40 | else: 41 | logger.info(f"✅ Round 1: Continuing as expected") 42 | logger.info(f"Generated query: {cypher}") 43 | 44 | # Add mock step for round 1 45 | step1 = QueryStep(1, "First query", cypher) 46 | step1.result = [{"test": "data1"}] 47 | previous_steps.append(step1) 48 | 49 | # Simulate second round 50 | logger.info(f"=== Testing Round 2 (should continue with warning) ===") 51 | cypher, explanation, should_stop, prompt, llm_response = rag_system.generate_next_query( 52 | question, mock_schema, previous_steps, max_rounds 53 | ) 54 | 55 | if should_stop: 56 | logger.error("❌ Round 2: System stopped unexpectedly") 57 | return False 58 | else: 59 | logger.info(f"✅ Round 2: Continuing as expected") 60 | logger.info(f"Generated query: {cypher}") 61 | # Check if warning about final round is in prompt 62 | logger.info(f"Round 2 prompt content: {prompt}") 63 | if "only have" in prompt and "round left" in prompt: 64 | logger.info("✅ Round 2: Warning about approaching limit found in prompt") 65 | else: 66 | logger.warning("⚠️ Round 2: No warning about approaching limit found") 67 | 68 | # Add mock step for round 2 69 | step2 = QueryStep(2, "Second query", cypher) 70 | step2.result = [{"test": "data2"}] 71 | previous_steps.append(step2) 72 | 73 | # Simulate third round (should force stop) 74 | logger.info(f"=== Testing Round 3 (should force stop) ===") 75 | cypher, explanation, should_stop, prompt, llm_response = rag_system.generate_next_query( 76 | question, mock_schema, previous_steps, max_rounds 77 | ) 78 | 79 | logger.info(f"Round 3 prompt content: {prompt}") 80 | if "final round" in prompt and "must now STOP" in prompt: 81 | logger.info("✅ Round 3: Force stop message found in prompt") 82 | else: 83 | logger.warning("⚠️ Round 3: No force stop message found") 84 | 85 | if should_stop: 86 | logger.info(f"✅ Round 3: System stopped as expected") 87 | logger.info(f"Final answer: {explanation}") 88 | return True 89 | else: 90 | logger.error("❌ Round 3: System should have stopped but continued") 91 | return False 92 | 93 | except Exception as e: 94 | logger.error(f"❌ Test failed with exception: {e}") 95 | import traceback 96 | traceback.print_exc() 97 | return False 98 | 99 | if __name__ == "__main__": 100 | logger.info("🧪 Testing Query Limit functionality") 101 | logger.info("=" * 50) 102 | 103 | success = test_query_limits() 104 | 105 | logger.info("=" * 50) 106 | if success: 107 | logger.info("🎉 Query limit test PASSED!") 108 | sys.exit(0) 109 | else: 110 | logger.error("❌ Query limit test FAILED!") 111 | sys.exit(1) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PuppyGraph 2 | 3 | This repository contains the PuppyGraph client for Python as well 4 | as PuppyGraph's Agentic Graph RAG libraries. 5 | 6 | ## Key Features 7 | 8 | - **Zero ETL**: Query data directly from your lakes and databases without data duplication. 9 | - **Dynamic Schema Management**: Modify graph schemas on the fly, without needing to rebuild databases. 10 | - **Petabyte Scalability**: Auto-sharded, distributed computation for handling vast datasets. 11 | - **Support for Cypher and Gremlin**: Interoperable query support with robust performance. 12 | - **AI-Native**: Ideal for Graph-RAG applications, with ultra-fast response times. 13 | 14 | ## Installation 15 | 16 | You can install the latest version via pip: 17 | 18 | ```bash 19 | pip install puppygraph 20 | ``` 21 | 22 | ## Quick Example 23 | 24 | ### Setup the client 25 | 26 | ```python 27 | from puppygraph import PuppyGraphClient, PuppyGraphHostConfig 28 | client = PuppyGraphClient(PuppyGraphHostConfig("localhost")) 29 | ``` 30 | 31 | ### Query the graph 32 | ```python 33 | # Cypher Query 34 | client.cypher_query("MATCH (actor:Actor)-[:ACTED_IN]->(movie:Movie) WHERE actor.name = \"Tom Hanks\" 35 | "RETURN movie.title") 36 | 37 | # Gremlin Query 38 | client.gremlin_query("g.V().hasLabel('person').has('name', 'Tom Hanks').out('ACTED_IN').values('title')") 39 | ``` 40 | 41 | ### Set the schema 42 | ```python 43 | # A sample schema for the IMDb dataset 44 | client.set_schema( 45 | { 46 | "catalogs": [ 47 | { 48 | "name": "imdb_catalog", 49 | "type": "DELTALAKE", 50 | "params": { 51 | "metastore_param": { 52 | "token": "your_token", 53 | "host": "your_host", 54 | "unity_catalog_name": "imdb_catalog", 55 | }, 56 | "storage_param": { 57 | "use_instance_profile": "false", 58 | "region": "us-west-2", 59 | "access_key": "your_access_key", 60 | "secret_key": "your_secret_key", 61 | "enable_ssl": "true", 62 | "type": "S3", 63 | }, 64 | }, 65 | } 66 | ], 67 | "vertices": [ 68 | { 69 | "table_source": { 70 | "catalog_name": "imdb_catalog", 71 | "schema_name": "public", 72 | "table_name": "movies", 73 | }, 74 | "label": "Movie", 75 | "description": "A movie in the IMDb database", 76 | "attributes": [ 77 | { 78 | "name": "title", 79 | "from_field": "title", 80 | "type": "String", 81 | "description": "The title of the movie", 82 | }, 83 | { 84 | "name": "release_year", 85 | "from_field": "release_year", 86 | "type": "Integer", 87 | "description": "The year the movie was released", 88 | }, 89 | ], 90 | "id": [ 91 | {"name": "movie_id", "from_field": "movie_id", "type": "String"} 92 | ], 93 | }, 94 | { 95 | "table_source": { 96 | "catalog_name": "imdb_catalog", 97 | "schema_name": "public", 98 | "table_name": "actors", 99 | }, 100 | "label": "Actor", 101 | "description": "An actor who starred in movies", 102 | "attributes": [ 103 | { 104 | "name": "name", 105 | "from_field": "name", 106 | "type": "String", 107 | "description": "The name of the actor", 108 | } 109 | ], 110 | "id": [ 111 | {"name": "actor_id", "from_field": "actor_id", "type": "String"} 112 | ], 113 | }, 114 | ], 115 | "edges": [ 116 | { 117 | "table_source": { 118 | "catalog_name": "imdb_catalog", 119 | "schema_name": "public", 120 | "table_name": "acted_in", 121 | }, 122 | "label": "ACTED_IN", 123 | "from_label": "Actor", 124 | "to_label": "Movie", 125 | "description": "An actor acted in a movie", 126 | "attributes": [], 127 | "id": [ 128 | { 129 | "name": "acted_in_id", 130 | "from_field": "acted_in_id", 131 | "type": "String", 132 | } 133 | ], 134 | "from_id": [ 135 | {"name": "actor_id", "from_field": "actor_id", "type": "String"} 136 | ], 137 | "to_id": [ 138 | {"name": "movie_id", "from_field": "movie_id", "type": "String"} 139 | ], 140 | } 141 | ], 142 | } 143 | ) 144 | 145 | ``` 146 | 147 | ## About PuppyGraph 148 | 149 | [PuppyGraph](https://www.puppygraph.com) 150 | is a zero-ETL graph analytics engine enabling seamless graph querying across one or multiple data sources. 151 | Unlike traditional graph databases, PuppyGraph connects directly to your data warehouses and lakes without requiring complex ETL pipelines, making it both cost-efficient and scalable. 152 | -------------------------------------------------------------------------------- /apps/chatbot/test_integration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | import time 6 | import logging 7 | from typing import Dict, Any 8 | 9 | # Setup logging 10 | logging.basicConfig(level=logging.INFO) 11 | logger = logging.getLogger("integration_test") 12 | 13 | def test_imports(): 14 | """Test that all required modules can be imported""" 15 | logger.info("Testing imports...") 16 | 17 | try: 18 | import gradio as gr 19 | logger.info("✅ Gradio imported successfully") 20 | except ImportError as e: 21 | logger.error(f"❌ Failed to import Gradio: {e}") 22 | return False 23 | 24 | try: 25 | from backend import PuppyGraphChatbot 26 | logger.info("✅ Backend imported successfully") 27 | except ImportError as e: 28 | logger.error(f"❌ Failed to import backend: {e}") 29 | return False 30 | 31 | try: 32 | from rag_system import TextToCypherRAG 33 | logger.info("✅ RAG system imported successfully") 34 | except ImportError as e: 35 | logger.error(f"❌ Failed to import RAG system: {e}") 36 | return False 37 | 38 | try: 39 | import mcp_server 40 | logger.info("✅ MCP server imported successfully") 41 | except ImportError as e: 42 | logger.error(f"❌ Failed to import MCP server: {e}") 43 | return False 44 | 45 | return True 46 | 47 | def test_rag_system(): 48 | """Test RAG system functionality""" 49 | logger.info("Testing RAG system...") 50 | 51 | try: 52 | from rag_system import TextToCypherRAG, QueryExample 53 | 54 | # Initialize RAG system (will use lightweight model for testing) 55 | rag = TextToCypherRAG() 56 | logger.info("✅ RAG system initialized") 57 | 58 | # Test adding an example 59 | example = QueryExample( 60 | question="Count all nodes", 61 | cypher="MATCH (n) RETURN count(n)", 62 | description="Counts all nodes in the graph" 63 | ) 64 | rag.add_example(example) 65 | logger.info("✅ Example added to RAG system") 66 | 67 | # Test finding similar examples 68 | similar = rag.find_similar_examples("How many nodes are there?") 69 | if similar: 70 | logger.info(f"✅ Found {len(similar)} similar examples") 71 | return True 72 | else: 73 | logger.warning("⚠️ No similar examples found (may be expected)") 74 | return True 75 | 76 | except Exception as e: 77 | logger.error(f"❌ RAG system test failed: {e}") 78 | return False 79 | 80 | def test_backend_basic(): 81 | """Test basic backend functionality""" 82 | logger.info("Testing backend basic functionality...") 83 | 84 | try: 85 | from backend import PuppyGraphChatbot 86 | 87 | # Initialize chatbot (this will fail if PuppyGraph is not running) 88 | try: 89 | chatbot = PuppyGraphChatbot() 90 | logger.info("✅ Backend initialized successfully") 91 | 92 | # Test schema retrieval 93 | schema = chatbot.get_schema() 94 | logger.info(f"✅ Schema retrieved: {len(schema.get('vertices', []))} vertices, {len(schema.get('edges', []))} edges") 95 | 96 | # Test stats (this might fail if no connection to PuppyGraph) 97 | stats = chatbot.get_graph_stats() 98 | if "error" in stats: 99 | logger.warning(f"⚠️ Graph stats returned error (PuppyGraph may not be running): {stats['error']}") 100 | else: 101 | logger.info(f"✅ Graph stats: {stats.get('node_count', 'unknown')} nodes, {stats.get('edge_count', 'unknown')} edges") 102 | 103 | return True 104 | 105 | except Exception as e: 106 | logger.warning(f"⚠️ Backend connection failed (PuppyGraph may not be running): {e}") 107 | return True # This is expected if PuppyGraph is not running 108 | 109 | except Exception as e: 110 | logger.error(f"❌ Backend test failed: {e}") 111 | return False 112 | 113 | def test_gradio_interface(): 114 | """Test Gradio interface creation""" 115 | logger.info("Testing Gradio interface...") 116 | 117 | try: 118 | from gradio_app import create_interface 119 | 120 | # Create interface (don't launch) 121 | interface = create_interface() 122 | logger.info("✅ Gradio interface created successfully") 123 | return True 124 | 125 | except Exception as e: 126 | logger.error(f"❌ Gradio interface test failed: {e}") 127 | return False 128 | 129 | def test_environment_setup(): 130 | """Test environment and configuration""" 131 | logger.info("Testing environment setup...") 132 | 133 | # Check for .env file 134 | if os.path.exists('.env'): 135 | logger.info("✅ .env file found") 136 | else: 137 | logger.warning("⚠️ .env file not found (using .env.example)") 138 | 139 | # Check for Anthropic API key 140 | anthropic_key = os.getenv('ANTHROPIC_API_KEY') 141 | if anthropic_key: 142 | logger.info("✅ Anthropic API key configured") 143 | else: 144 | logger.warning("⚠️ Anthropic API key not found - RAG functionality may be limited") 145 | 146 | return True 147 | 148 | def run_all_tests(): 149 | """Run all integration tests""" 150 | logger.info("Starting PuppyGraph RAG Chatbot Integration Tests") 151 | logger.info("=" * 60) 152 | 153 | tests = [ 154 | ("Environment Setup", test_environment_setup), 155 | ("Module Imports", test_imports), 156 | ("RAG System", test_rag_system), 157 | ("Backend Basic", test_backend_basic), 158 | ("Gradio Interface", test_gradio_interface), 159 | ] 160 | 161 | results = {} 162 | 163 | for test_name, test_func in tests: 164 | logger.info(f"\n🧪 Running {test_name} test...") 165 | try: 166 | results[test_name] = test_func() 167 | except Exception as e: 168 | logger.error(f"❌ {test_name} test crashed: {e}") 169 | results[test_name] = False 170 | 171 | # Summary 172 | logger.info("\n" + "=" * 60) 173 | logger.info("TEST RESULTS SUMMARY") 174 | logger.info("=" * 60) 175 | 176 | passed = 0 177 | total = len(results) 178 | 179 | for test_name, result in results.items(): 180 | status = "✅ PASS" if result else "❌ FAIL" 181 | logger.info(f"{test_name:.<40} {status}") 182 | if result: 183 | passed += 1 184 | 185 | logger.info(f"\nOverall: {passed}/{total} tests passed") 186 | 187 | if passed == total: 188 | logger.info("🎉 All tests passed! The system is ready to use.") 189 | return True 190 | else: 191 | logger.warning("⚠️ Some tests failed. Check the logs above for details.") 192 | return False 193 | 194 | def main(): 195 | """Main test runner""" 196 | success = run_all_tests() 197 | sys.exit(0 if success else 1) 198 | 199 | if __name__ == "__main__": 200 | main() -------------------------------------------------------------------------------- /puppygraph/common/conversion_utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for conversion.""" 2 | 3 | from typing import Any, Dict, List, Union 4 | 5 | import dacite 6 | from puppygraph.common.dataclass_utils import dataclass_to_camel_dict 7 | from puppygraph.data.mapping.catalog_config import CatalogConfig, CatalogType 8 | from puppygraph.data.mapping.database_params import ( 9 | DatabaseParams, 10 | ElasticSearchParam, 11 | JDBCParam, 12 | ) 13 | from puppygraph.data.mapping.datalake_params import ( 14 | DatalakeParams, 15 | MetastoreParam, 16 | S3StorageParam, 17 | UnityMetastoreParam, 18 | ) 19 | from puppygraph.data.mapping.graph_element_cache_config import ( 20 | GraphElementCacheConfig, 21 | ) 22 | from puppygraph.data.mapping.graph_mapping_config import ( 23 | GraphElementConfig, 24 | MappedField, 25 | PuppyGraphMappingConfig, 26 | TableSource, 27 | ) 28 | 29 | 30 | def convert_mapping_config_to_host_json( 31 | config: Union[PuppyGraphMappingConfig, Dict], 32 | ) -> Dict[str, Any]: 33 | """Converts the PuppyGraph Mapping config to the host schema JSON. 34 | 35 | Args: 36 | config: The PuppyGraph Mapping config. 37 | 38 | Returns: 39 | The host schema JSON. 40 | """ 41 | 42 | def _metastore_param_to_json(metastore_param: MetastoreParam) -> Dict[str, Any]: 43 | json = {} 44 | if isinstance(metastore_param, UnityMetastoreParam): 45 | json = dataclass_to_camel_dict(metastore_param) 46 | json["type"] = "unity" 47 | # Rename unityCatalogName to databricksCatalogName 48 | if "unityCatalogName" in json: 49 | json["databricksCatalogName"] = json.pop("unityCatalogName") 50 | return json 51 | 52 | def _storage_param_to_json(storage_param: S3StorageParam) -> Dict[str, Any]: 53 | json = {} 54 | if isinstance(storage_param, S3StorageParam): 55 | json = dataclass_to_camel_dict(storage_param) 56 | json["type"] = "S3" 57 | return json 58 | 59 | def _datalake_params_to_json(datalake_params: DatalakeParams) -> Dict[str, Any]: 60 | json = {} 61 | if datalake_params.metastore_param is not None: 62 | json["metastore"] = _metastore_param_to_json( 63 | datalake_params.metastore_param 64 | ) 65 | if datalake_params.storage_param is not None: 66 | json["storage"] = _storage_param_to_json(datalake_params.storage_param) 67 | return json 68 | 69 | def _database_params_to_json(database_params: DatabaseParams) -> Dict[str, Any]: 70 | key = "" 71 | if isinstance(database_params, JDBCParam): 72 | key = "jdbc" 73 | elif isinstance(database_params, ElasticSearchParam): 74 | key = "elasticSearch" 75 | return {key: dataclass_to_camel_dict(database_params)} 76 | 77 | def _catalog_config_to_json(catalog_config: CatalogConfig) -> Dict[str, Any]: 78 | """Maps the catalog config to the host catalog JSON. 79 | 80 | Args: 81 | catalog_config: The catalog configuration to map. 82 | 83 | Returns: 84 | The host catalog JSON. 85 | """ 86 | catalog_json = { 87 | "name": catalog_config.name, 88 | "type": catalog_config.type.name.lower(), 89 | } 90 | 91 | if isinstance(catalog_config.params, DatalakeParams): 92 | catalog_json.update(_datalake_params_to_json(catalog_config.params)) 93 | elif isinstance(catalog_config.params, DatabaseParams): 94 | catalog_json.update(_database_params_to_json(catalog_config.params)) 95 | 96 | return catalog_json 97 | 98 | def _table_source_to_json(table_source: TableSource) -> Dict[str, Any]: 99 | return { 100 | "catalog": table_source.catalog_name, 101 | "schema": table_source.schema_name, 102 | "table": table_source.table_name, 103 | } 104 | 105 | def _mapped_id_to_json(mapped_id: List[MappedField]) -> Dict[str, Any]: 106 | return { 107 | "fields": [ 108 | { 109 | "field": mapped_field.from_field, 110 | "alias": mapped_field.name, 111 | "type": mapped_field.type, 112 | } 113 | for mapped_field in mapped_id 114 | ] 115 | } 116 | 117 | def _attribute_to_json(mapped_attribute: MappedField) -> Dict[str, Any]: 118 | return { 119 | "alias": mapped_attribute.name, 120 | "field": mapped_attribute.from_field, 121 | "type": mapped_attribute.type, 122 | } 123 | 124 | def _cache_config_to_json(cache_config: GraphElementCacheConfig) -> Dict[str, Any]: 125 | return dataclass_to_camel_dict(cache_config) 126 | 127 | def _vertex_config_to_json(vertex_config: GraphElementConfig) -> Dict[str, Any]: 128 | json = { 129 | "label": vertex_config.label, 130 | "oneToOne": { 131 | "tableSource": _table_source_to_json( 132 | table_source=vertex_config.table_source 133 | ), 134 | "id": _mapped_id_to_json(mapped_id=vertex_config.id), 135 | "attributes": [ 136 | _attribute_to_json( 137 | mapped_attribute=attribute, 138 | ) 139 | for attribute in vertex_config.attributes 140 | ], 141 | }, 142 | } 143 | if vertex_config.cache_config is not None: 144 | json["cache"] = _cache_config_to_json(vertex_config.cache_config) 145 | return json 146 | 147 | def _edge_config_to_json( 148 | edge_config: GraphElementConfig, 149 | ) -> Dict[str, Any]: 150 | 151 | json = { 152 | "label": edge_config.label, 153 | "fromVertex": edge_config.from_label, 154 | "toVertex": edge_config.to_label, 155 | "tableSource": _table_source_to_json(table_source=edge_config.table_source), 156 | "id": _mapped_id_to_json(mapped_id=edge_config.id), 157 | "fromId": _mapped_id_to_json(mapped_id=edge_config.from_id), 158 | "toId": _mapped_id_to_json(mapped_id=edge_config.to_id), 159 | "attributes": [ 160 | _attribute_to_json( 161 | mapped_attribute=attribute, 162 | ) 163 | for attribute in edge_config.attributes 164 | ], 165 | } 166 | 167 | if edge_config.cache_config is not None: 168 | json["cache"] = _cache_config_to_json(edge_config.cache_config) 169 | return json 170 | 171 | if isinstance(config, dict): 172 | config = dacite.from_dict( 173 | data_class=PuppyGraphMappingConfig, 174 | data=config, 175 | config=dacite.Config( 176 | type_hooks={ 177 | CatalogType: lambda x: CatalogType[x.upper()], 178 | } 179 | ), 180 | ) 181 | 182 | return { 183 | "catalogs": [ 184 | _catalog_config_to_json(catalog_config) 185 | for catalog_config in config.catalogs 186 | ], 187 | "graph": { 188 | "vertices": [ 189 | _vertex_config_to_json(vertex_config=vertex_config) 190 | for vertex_config in config.vertices 191 | ], 192 | "edges": [ 193 | _edge_config_to_json(edge_config=edge_config) 194 | for edge_config in config.edges 195 | ], 196 | }, 197 | } 198 | -------------------------------------------------------------------------------- /puppygraph/rag/graph_agent.py: -------------------------------------------------------------------------------- 1 | """PuppyGraphAgent.""" 2 | 3 | import logging 4 | from copy import deepcopy 5 | from typing import Iterable, List, Optional 6 | 7 | from langchain_core.language_models.chat_models import BaseChatModel 8 | from langchain_core.messages import ( 9 | AIMessage, 10 | BaseMessage, 11 | HumanMessage, 12 | ToolCall, 13 | ToolMessage, 14 | ) 15 | from langchain_core.prompts.chat import ChatPromptTemplate 16 | from langchain_core.pydantic_v1 import Field, create_model 17 | from langchain_core.runnables import RunnableSequence 18 | from langchain_core.tools import StructuredTool, Tool 19 | 20 | from puppygraph.client.client import PuppyGraphClient 21 | 22 | logger = logging.getLogger(__name__) 23 | logger.addHandler( 24 | logging.NullHandler() 25 | ) # Prevent "No handlers could be found" warnings 26 | 27 | 28 | class PuppyGraphAgent: 29 | """PuppyGraphAgent is the agent that interacts with the PuppyGraph via natural language. 30 | 31 | It enables the user to interact with the graph via natural lanaguage queries. 32 | """ 33 | 34 | def __init__( 35 | self, 36 | puppy_graph_client: PuppyGraphClient, 37 | llm: BaseChatModel, 38 | chat_prompt_template: ChatPromptTemplate, 39 | query_language: str = "cypher", 40 | additional_tools: Optional[List[Tool]] = None, 41 | ): 42 | """Initializes the PuppyGraphAgent. 43 | 44 | Args: 45 | puppy_graph_client: The PuppyGraph client 46 | llm: The language model 47 | chat_prompt_template: The chat prompt template 48 | query_language: The query language to use, either "cypher" or "gremlin" or "both" 49 | additional_tools: Additional tools for the agent to use 50 | """ 51 | 52 | self._puppy_graph_client = puppy_graph_client 53 | 54 | # Set up tools 55 | self._cypher_query_tool = _get_cypher_query_tool( 56 | puppy_graph_client=puppy_graph_client 57 | ) 58 | self._gremlin_query_tool = _get_gremlin_query_tool( 59 | puppy_graph_client=puppy_graph_client 60 | ) 61 | if query_language == "cypher": 62 | self._tools = [self._cypher_query_tool] 63 | elif query_language == "gremlin": 64 | self._tools = [self._gremlin_query_tool] 65 | elif query_language == "both": 66 | self._tools = [self._cypher_query_tool, self._gremlin_query_tool] 67 | 68 | if additional_tools is not None: 69 | self._tools.extend(additional_tools) 70 | 71 | self._tool_dict = {tool.name: tool for tool in self._tools} 72 | 73 | # Set up llm chain 74 | self._chat_prompt_template = chat_prompt_template 75 | 76 | self._llm = llm.bind_tools(tools=self._tools) 77 | self._llm_chain: RunnableSequence = self._chat_prompt_template | self._llm 78 | 79 | self._llm_no_tool_output = llm.bind_tools(tools=[]) 80 | self._llm_no_tool_output_chain: RunnableSequence = ( 81 | self._chat_prompt_template | self._llm_no_tool_output 82 | ) 83 | 84 | # Set up other global variables 85 | self._message_history = [] 86 | 87 | def query(self, user_input: str, max_iters: int = 10) -> Iterable[BaseMessage]: 88 | """Query the graph using the given user input. 89 | 90 | Args: 91 | user_input: The user input 92 | max_iters: The maximum number of iterations to run 93 | 94 | Yields: 95 | BaseMessage, can be either AIMessage or ToolMessage 96 | """ 97 | # We have to copy the message history to avoid side effects 98 | # if query() is called multiple times 99 | message_history = deepcopy(self._message_history) 100 | 101 | new_messages = [HumanMessage(content=user_input)] 102 | 103 | iters = 0 104 | 105 | wait_for_user_input = False 106 | while iters < max_iters and not wait_for_user_input: 107 | # If we are at the last iteration, we don't want to call the tool 108 | # This is because we want to show the user the final message 109 | tool_call_allowed = True 110 | if iters + 1 == max_iters: 111 | tool_call_allowed = False 112 | 113 | # Predict 114 | ai_message = self._llm_predict( 115 | message_history=message_history + new_messages, 116 | tool_call_allowed=tool_call_allowed, 117 | ) 118 | 119 | # Add AI message to new messages 120 | new_messages.append(ai_message) 121 | logger.info("AI message: %s", ai_message.content) 122 | yield ai_message 123 | 124 | if ai_message.tool_calls: 125 | # Execute the actual tool 126 | tool_messages = self._execute_tool_calls(ai_message) 127 | 128 | # Add tool messages to new messages 129 | new_messages.extend(tool_messages) 130 | logger.info("Tool messages: %s", tool_messages) 131 | yield from tool_messages 132 | 133 | iters += 1 134 | 135 | # Check if we need to wait for user input 136 | if not ai_message.tool_calls: 137 | wait_for_user_input = True 138 | 139 | # Update the message history 140 | self._message_history += new_messages 141 | 142 | def reset_messages(self): 143 | """Reset the message history.""" 144 | self._message_history = [] 145 | 146 | def _llm_predict( 147 | self, 148 | message_history: List[BaseMessage], 149 | tool_call_allowed: bool, 150 | ) -> AIMessage: 151 | """Predict the AI message using llm. 152 | 153 | Args: 154 | message_history: The message history 155 | tool_call_allowed: Whether tool calls are allowed 156 | 157 | Returns: 158 | The predicted AI message 159 | """ 160 | input_dict = { 161 | "message_history": message_history, 162 | } 163 | 164 | if not tool_call_allowed: 165 | return self._llm_no_tool_output_chain.invoke(input=input_dict) 166 | 167 | return self._llm_chain.invoke(input=input_dict) 168 | 169 | def _execute_tool_calls(self, ai_message: AIMessage) -> List[ToolMessage]: 170 | """Execute the tool calls in the AI message. 171 | 172 | Args: 173 | ai_message: The AI message which might contain tool calls 174 | 175 | Returns: 176 | The tool messages 177 | """ 178 | tool_messages = [] 179 | for tool_call in ai_message.tool_calls: 180 | tool_messages.append(self._execute_tool_call(tool_call)) 181 | return tool_messages 182 | 183 | def _execute_tool_call(self, tool_call: ToolCall) -> ToolMessage: 184 | """Execute the given tool call. 185 | 186 | Args: 187 | tool_call: The tool call to execute 188 | 189 | Returns: 190 | The tool message 191 | """ 192 | tool = self._tool_dict[tool_call["name"]] 193 | logger.info( 194 | "Calling tool: %s with args: %s", tool_call["name"], tool_call["args"] 195 | ) 196 | try: 197 | tool_output = str(tool.invoke(input=tool_call["args"])) 198 | except Exception as e: 199 | tool_output = f"While executing tool, an error occurred : {e}" 200 | 201 | return ToolMessage(tool_output, tool_call_id=tool_call["id"]) 202 | 203 | 204 | def _get_cypher_query_tool(puppy_graph_client: PuppyGraphClient): 205 | """Get the Cypher query tool.""" 206 | return StructuredTool.from_function( 207 | func=puppy_graph_client.cypher_query, 208 | name="query_graph_cypher", 209 | description="Query the graph database using Cypher.", 210 | args_schema=create_model( 211 | "", query=(str, Field(description="The Cypher query to run")) 212 | ), 213 | ) 214 | 215 | 216 | def _get_gremlin_query_tool(puppy_graph_client: PuppyGraphClient): 217 | """Get the Gremlin query tool.""" 218 | return StructuredTool.from_function( 219 | func=puppy_graph_client.gremlin_query, 220 | name="query_graph_gremlin", 221 | description="Query the graph database using Gremlin.", 222 | args_schema=create_model( 223 | "", query=(str, Field(description="The Gremlin query to run")) 224 | ), 225 | ) 226 | -------------------------------------------------------------------------------- /apps/chatbot/README.md: -------------------------------------------------------------------------------- 1 | # PuppyGraph RAG Chatbot Demo 2 | 3 | A conversational AI interface for PuppyGraph that converts natural language questions into Cypher queries using Retrieval-Augmented Generation (RAG). 4 | 5 | ## Features 6 | 7 | - 🤖 **Natural Language to Cypher**: Ask questions in plain English, get intelligent query execution 8 | - 🔄 **Multi-Round Execution**: Automatically generates and executes multiple queries as needed 9 | - ⚡ **Real-time Streaming**: Watch each query step execute live as it happens 10 | - 🧠 **RAG-Powered**: Uses embeddings and similar examples to improve query generation 11 | - 🔌 **MCP Integration**: Custom Model Context Protocol server for PuppyGraph 12 | - 🧭 **Claude Sonnet 4.0**: Powered by Anthropic's latest language model with intelligent stopping 13 | - 📊 **Graph Exploration**: Built-in schema viewer and statistics 14 | - 🎯 **Interactive UI**: Clean Gradio interface with real-time updates 15 | - 📚 **Learning System**: Add your own examples to improve performance 16 | 17 | ## Architecture 18 | 19 | ``` 20 | ┌─────────────────┐ ┌──────────────────┐ ┌─────────────────┐ 21 | │ │ │ │ │ │ 22 | │ Gradio UI │◄──►│ Python Backend │◄──►│ PuppyGraph │ 23 | │ │ │ │ │ │ 24 | └─────────────────┘ └──────────────────┘ └─────────────────┘ 25 | │ 26 | ▼ 27 | ┌──────────────────┐ 28 | │ │ 29 | │ MCP Server │ 30 | │ │ 31 | └──────────────────┘ 32 | │ 33 | ▼ 34 | ┌──────────────────┐ 35 | │ │ 36 | │ RAG System │ 37 | │ (ChromaDB + │ 38 | │ Embeddings + │ 39 | │ Claude Sonnet) │ 40 | └──────────────────┘ 41 | ``` 42 | 43 | ## Installation 44 | 45 | 1. Create a virtual environment and activate it: 46 | ```bash 47 | python -m venv venv 48 | source venv/bin/activate 49 | ``` 50 | 51 | 2. **Install dependencies:** 52 | ```bash 53 | pip install -r requirements.txt 54 | ``` 55 | 56 | 3. **Set up environment variables:** 57 | ```bash 58 | cp .env.example .env 59 | # Edit .env with your Anthropic API key and PuppyGraph settings 60 | ``` 61 | 62 | 4. **Ensure PuppyGraph is running:** 63 | - Bolt protocol on port 7687 64 | - HTTP API on port 8081 65 | - Default credentials: puppygraph/puppygraph123 66 | 67 | ## Usage 68 | 69 | ### Quick Start 70 | 71 | ```bash 72 | python gradio_app.py 73 | ``` 74 | 75 | Then open http://localhost:7860 in your browser. 76 | 77 | ### Components 78 | 79 | #### 1. MCP Server (`mcp_server.py`) 80 | Standalone Model Context Protocol server that provides: 81 | - Cypher query execution 82 | - Schema introspection 83 | - Query validation 84 | - Graph statistics 85 | 86 | Run standalone: 87 | ```bash 88 | python mcp_server.py 89 | ``` 90 | 91 | #### 2. RAG System (`rag_system.py`) 92 | Handles text-to-Cypher conversion using: 93 | - Sentence embeddings for question similarity 94 | - ChromaDB for example storage 95 | - Claude Sonnet 4.0 for query generation 96 | - Confidence scoring 97 | 98 | #### 3. Backend (`backend.py`) 99 | Coordinates all components: 100 | - Manages MCP server process 101 | - Integrates RAG system 102 | - Handles conversation history 103 | - Provides unified API 104 | 105 | #### 4. Gradio UI (`gradio_app.py`) 106 | Interactive web interface with: 107 | - Chat interface for questions 108 | - Schema and statistics viewer 109 | - Example management system 110 | - Help documentation 111 | 112 | ## Example Queries 113 | 114 | ### Simple Queries (typically 1 round): 115 | - "Show me all nodes in the graph" 116 | - "Count the total number of relationships" 117 | - "What types of nodes exist?" 118 | - "Show me the graph schema" 119 | 120 | ### Complex Queries (typically 2-3 rounds): 121 | - "Which users have the most connections and what do they connect to?" 122 | - "What percentage of nodes have more than 5 relationships?" 123 | - "Find nodes that are connected to both X and Y type nodes" 124 | - "Show me the top 5 most connected entities and their relationship types" 125 | - "How many different paths exist between node A and node B?" 126 | 127 | ## Adding Custom Examples 128 | 129 | Use the "Add Examples" tab to teach the system new patterns: 130 | 131 | 1. **Question**: "Find users who bought expensive products" 132 | 2. **Cypher**: `MATCH (u:User)-[:BOUGHT]->(p:Product) WHERE p.price > 100 RETURN u, p` 133 | 3. **Description**: "Finds users who purchased products over $100" 134 | 135 | ## Configuration 136 | 137 | ### Environment Variables 138 | 139 | - `ANTHROPIC_API_KEY`: Required for Claude Sonnet 4.0 integration 140 | - `PUPPYGRAPH_BOLT_URI`: PuppyGraph Bolt endpoint (default: bolt://localhost:7687) 141 | - `PUPPYGRAPH_HTTP_URI`: PuppyGraph HTTP API (default: http://localhost:8081) 142 | - `PUPPYGRAPH_USERNAME`: Database username (default: puppygraph) 143 | - `PUPPYGRAPH_PASSWORD`: Database password (default: puppygraph123) 144 | 145 | ### Customization 146 | 147 | #### RAG System 148 | - **Embedding Model**: Change in `rag_system.py` (default: all-MiniLM-L6-v2) 149 | - **Vector Database**: ChromaDB configuration 150 | - **LLM Model**: Claude model selection (default: claude-sonnet-4-20250514) 151 | 152 | #### UI Customization 153 | - **Port**: Modify in `gradio_app.py` (default: 7860) 154 | - **Styling**: Update CSS in the interface creation 155 | - **Tabs**: Add/remove functionality tabs 156 | 157 | ## API Reference 158 | 159 | ### Backend Methods 160 | 161 | ```python 162 | from backend import PuppyGraphChatbot 163 | 164 | chatbot = PuppyGraphChatbot() 165 | 166 | # Process natural language query 167 | result = chatbot.process_natural_language_query("Show all nodes") 168 | 169 | # Add custom example 170 | chatbot.add_query_example( 171 | question="Find connected users", 172 | cypher="MATCH (u1:User)-[:FRIEND]->(u2:User) RETURN u1, u2", 173 | description="Shows friendship connections" 174 | ) 175 | 176 | # Get graph statistics 177 | stats = chatbot.get_graph_stats() 178 | ``` 179 | 180 | ### MCP Server Tools 181 | 182 | When running as MCP server, provides these tools: 183 | - `execute_cypher`: Run Cypher queries 184 | - `get_schema_info`: Get schema with optional samples 185 | - `validate_cypher`: Validate query syntax 186 | 187 | ## Troubleshooting 188 | 189 | ### Common Issues 190 | 191 | 1. **Connection Error**: Ensure PuppyGraph is running and accessible 192 | 2. **Anthropic API Error**: Check your API key and credits 193 | 3. **Import Errors**: Install all requirements with `pip install -r requirements.txt` 194 | 4. **MCP Server Issues**: Check logs for connection problems 195 | 196 | ### Logs 197 | 198 | Enable debug logging: 199 | ```python 200 | import logging 201 | logging.basicConfig(level=logging.DEBUG) 202 | ``` 203 | 204 | ### Testing Connection 205 | 206 | Test PuppyGraph connectivity: 207 | ```python 208 | from backend import PuppyGraphChatbot 209 | chatbot = PuppyGraphChatbot() 210 | print(chatbot.get_graph_stats()) 211 | ``` 212 | 213 | ## Development 214 | 215 | ### Project Structure 216 | 217 | ``` 218 | rag-demo/ 219 | ├── gradio_app.py # Main Gradio UI application 220 | ├── backend.py # Backend coordinator 221 | ├── mcp_server.py # MCP server implementation 222 | ├── rag_system.py # RAG/text-to-cypher system 223 | ├── requirements.txt # Python dependencies 224 | ├── .env.example # Environment variables template 225 | └── README.md # This file 226 | ``` 227 | 228 | ### Adding Features 229 | 230 | 1. **New Query Types**: Add examples to `rag_system.py` 231 | 2. **UI Components**: Extend tabs in `gradio_app.py` 232 | 3. **MCP Tools**: Add tools in `mcp_server.py` 233 | 4. **Backend Logic**: Extend `backend.py` 234 | 235 | ### Testing 236 | 237 | ```bash 238 | # Test MCP server 239 | python mcp_server.py 240 | 241 | # Test RAG system 242 | python -c "from rag_system import TextToCypherRAG; rag = TextToCypherRAG(); print('RAG system OK')" 243 | 244 | # Test backend 245 | python -c "from backend import get_chatbot; chatbot = get_chatbot(); print(chatbot.get_graph_stats())" 246 | ``` 247 | 248 | ## Contributing 249 | 250 | 1. Fork the repository 251 | 2. Create a feature branch 252 | 3. Make your changes 253 | 4. Add tests if applicable 254 | 5. Submit a pull request 255 | 256 | ## License 257 | 258 | This project is part of the PuppyGraph ecosystem. See the main repository for license information. 259 | 260 | ## Support 261 | 262 | For issues and questions: 263 | - Check the troubleshooting section 264 | - Review PuppyGraph documentation 265 | - Open an issue in the main repository -------------------------------------------------------------------------------- /apps/databricks_mining_site/run_agent.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from typing import List 4 | 5 | import yaml 6 | from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder 7 | from langchain_openai import ChatOpenAI 8 | 9 | from puppygraph import PuppyGraphClient, PuppyGraphHostConfig 10 | from puppygraph.rag import PuppyGraphAgent 11 | from langchain_core.messages import BaseMessage 12 | from typing import Iterable, List 13 | 14 | logging.basicConfig(level=logging.INFO) 15 | 16 | 17 | def _get_graph_schema_prompt() -> str: 18 | return """ 19 | Nodes are the following: 20 | - failure_type: 21 | properties: 22 | - name: failure_type_name 23 | type: String 24 | description: The name of the failure type, only takes values from "Electrical", "Mechanical", "Software", "Pneumatic", "Hydraulic" 25 | - asset: 26 | description: The asset node represents a physical asset in the mining site 27 | properties: 28 | - name: asset_id 29 | type: String 30 | - name: asset_name 31 | type: String 32 | - name: asset_type 33 | type: String 34 | description: the type of an asset, only takes values from "Heavy Machinery", "Drilling", "Material Handling", "Transport", "Processing", "Safety" 35 | - name: location 36 | type: String 37 | - name: acquisition_date 38 | type: String 39 | description: the format is "YYYY-MM-DD" 40 | - name: status 41 | type: String 42 | description: The status of the asset, only takes values from "Active", "Inactive", "Under Maintenance" 43 | - work_order: 44 | description: The work order node represents a work order raised for an asset 45 | properties: 46 | - name: work_order_id 47 | type: String 48 | - name: date 49 | type: String 50 | - name: action_taken 51 | type: String 52 | - name: technician 53 | type: String 54 | - name: component_replaced_description 55 | type: String 56 | description: Description of the component replaced, only takes values from "Alternator", "Brake Assembly", "Control Panel", "Conveyor Belt", "Cooling Fan", "Engine Oil Filter", "Exhaust Manifold", "Fuel Injector", "Hydraulic Cylinder", "Hydraulic Hose", "Hydraulic Pump", "Pressure Sensor", "Swing Motor", "Track Chain", "Transmission". 57 | - name: component_replaced_material_num 58 | type: String 59 | - name: successful_fix 60 | type: Boolean 61 | description: Whether the work order successfully fixed the asset or not. 62 | Edges are the following: 63 | - can_have_failure: 64 | description: The potential failure mode of an asset 65 | from: asset 66 | to: failure_type 67 | properties: 68 | - name: steps_to_follow 69 | type: String 70 | description: The steps to follow to troubleshoot the failure. (remember, this property is on the EDGE, not the NODE) 71 | - name: reference_source 72 | type: String 73 | descritpion: The reference source of the troubleshooting steps, only takes values from "Documentum", "OEM", "Internal Manual" 74 | - name: recommended_actions 75 | type: String 76 | - worked_on: 77 | description: A work order is working on an asset 78 | from: work_order 79 | to: asset 80 | properties: NONE 81 | - related_to_failure: 82 | description: A work order identifies a failure type on a specific asset 83 | properties: NONE 84 | from: work_order 85 | to: failure_type 86 | The relationships are the following: 87 | (:asset)-[:can_have_failure]->(:failure_type), 88 | (:work_order)-[:worked_on]->(:asset), 89 | (:work_order)-[:related_to_failure]->(:failure_type) 90 | """ 91 | 92 | 93 | def _get_chat_prompt_template(graph_schema_prompt: str) -> ChatPromptTemplate: 94 | 95 | return ChatPromptTemplate.from_messages( 96 | [ 97 | ( 98 | "system", 99 | "You are a helpful assistant to help answer user questions about assets in a mining site." 100 | "You will need to use the information stored in the graph database to answer the user's questions." 101 | "Here is some information about the graph database schema.\n" 102 | f"{graph_schema_prompt}", 103 | ), 104 | ( 105 | "system", 106 | "You must first output a PLAN, then you can use the PLAN to call the tools.\n" 107 | "Each STEP of the PLAN should be corresponding to one or more function calls (but not less), either simple or complex.\n" 108 | "Minimize the number of steps in the PLAN, but make sure the PLAN is workable.\n" 109 | "Remember, each step can be converted to a Cypher query, since Cypher query can handle quite complex queries," 110 | "each step can be complex as well as long as it can be converted to a Cypher query.", 111 | ), 112 | MessagesPlaceholder(variable_name="message_history"), 113 | ( 114 | "system", 115 | "For COUNT(), ONLY use COUNT(*) in your Cypher queries, as COUNT(something) is not supported yet.When calculating failures for a particular asset, also first find out the work orders that are related to the asset, then count the work orders that are related to the failure using related_to_failure. DO NOT USE can_have_failure for counting total number of failures, USE related_to_failure instead.", 116 | ), 117 | ( 118 | "system", 119 | "Always use the format {\n" 120 | "'THINKING': ," 121 | "'PLAN': ," 122 | "'CONCLUSION': }", 123 | ), 124 | ], 125 | template_format="jinja2", 126 | ) 127 | 128 | 129 | def _get_llm() -> ChatOpenAI: 130 | return ChatOpenAI( 131 | model="gpt-4o-2024-08-06", 132 | temperature=0, 133 | api_key=os.getenv("OPENAI_API_KEY"), 134 | ) 135 | 136 | 137 | def _get_puppy_graph_client(ip) -> PuppyGraphClient: 138 | return PuppyGraphClient(PuppyGraphHostConfig(ip=ip)) 139 | 140 | 141 | def _process_answer(answers: Iterable[BaseMessage]) -> str: 142 | reversed_answers = reversed(list(answers)) 143 | for answer in reversed_answers: 144 | text = answer.content 145 | try: 146 | text_dict = yaml.safe_load(text) 147 | if "CONCLUSION" in text_dict: 148 | return text_dict["CONCLUSION"] 149 | except: 150 | text_split = text.split("'CONCLUSION':") 151 | return text_split[-1].strip("\n}") 152 | return text 153 | 154 | 155 | def _run_queries(pg_agent: PuppyGraphAgent) -> List[str]: 156 | queries = [ 157 | "How do I check engine oil levels?", 158 | "How many mechanical failures has Excavator 3000 had?", 159 | "How many times did we need to replace pressure sensor on Haul Truck 400T?", 160 | "How many mechanical work orders were unsuccessful?", 161 | "When was a work order raised for Load-Haul-Dump Machine to update fuel injector?", 162 | "What are the Asset IDs of Heavy Machinery?", 163 | "How many assets are active in Site A?", 164 | "When should I replace Pressure Sensor?", 165 | "How many troubleshooting steps are from the Documentum?", 166 | "Which asset had the most work orders and how many of them?", 167 | "How do I safeguard against system errors in my Hydraulic Shovels?", 168 | "Was the transmission tested under load in WO008?", 169 | "Was the troubleshooting guide for Excavator 3000 followed for WO001 order?", 170 | "Where is Excavator 3000 located?", 171 | "What are the previous failures type for Excavator 3000 from work order logs?", 172 | "Did we replace the Cooling fan on Crusher CR6000?", 173 | "What component have we replaced the most?", 174 | "How many Electrical failures has Crusher CR6000 had?", 175 | "How many Electrical failures have Heavy Machinery Had?", 176 | ] 177 | 178 | answers = [] 179 | for i, query in enumerate(queries): 180 | print(f"======{i}======") 181 | print(f"User: {query}") 182 | 183 | # We are doing single user query, not a conversation 184 | # so need to reset history for each turn 185 | pg_agent.reset_messages() 186 | answer = _process_answer(pg_agent.query(query)) 187 | answers.append(answer) 188 | 189 | print(f"System: {answer}") 190 | print(f"=====================") 191 | 192 | return answers 193 | 194 | 195 | def main(): 196 | """Main function to run the puppygraph agent. 197 | 198 | We first run a set of queries and then enter free chat mode. 199 | """ 200 | pg_agent = PuppyGraphAgent( 201 | puppy_graph_client=_get_puppy_graph_client("127.0.0.1"), 202 | llm=_get_llm(), 203 | chat_prompt_template=_get_chat_prompt_template( 204 | graph_schema_prompt=_get_graph_schema_prompt() 205 | ), 206 | ) 207 | 208 | _run_queries(pg_agent=pg_agent) 209 | 210 | print("\n=======Entering Free Chat Mode=======\n") 211 | pg_agent.reset_messages() 212 | while True: 213 | user_input = input("User: ") 214 | response = pg_agent.query(user_input=user_input) 215 | print(f"System: {_process_answer(response)}") 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /puppygraph/client/client.py: -------------------------------------------------------------------------------- 1 | """PuppyGraph client module.""" 2 | 3 | import logging 4 | import threading 5 | import time 6 | from typing import Any, Dict, List, Optional, Union 7 | 8 | import requests 9 | from gremlin_python.driver import client as GremlinClient 10 | from gremlin_python.driver.protocol import GremlinServerError 11 | from gremlin_python.driver.serializer import GraphBinarySerializersV1 12 | from neo4j import Driver as CypherDriver 13 | from neo4j import GraphDatabase, Query, Result 14 | from neo4j.exceptions import AuthError, CypherSyntaxError, ServiceUnavailable 15 | from neo4j.graph import Node, Relationship, Path 16 | from puppygraph.common.conversion_utils import convert_mapping_config_to_host_json 17 | from puppygraph.data.host.host_config import PuppyGraphHostConfig 18 | from puppygraph.data.mapping.graph_mapping_config import PuppyGraphMappingConfig 19 | 20 | logger = logging.getLogger(__name__) 21 | logger.addHandler( 22 | logging.NullHandler() 23 | ) # Prevent "No handlers could be found" warnings 24 | 25 | 26 | class PuppyGraphClient: 27 | """PuppyGraph client.""" 28 | 29 | def __init__(self, config: PuppyGraphHostConfig): 30 | """Initializes a PuppyGraph client.""" 31 | self._config = config 32 | self._cypher_query_driver = GraphDatabase.driver( 33 | f"bolt://{config.ip}:{config.cypher_port}/", 34 | max_connection_lifetime=config.cypher_max_connection_lifetime 35 | ) 36 | self._gremlin_query_client = GremlinClient.Client( 37 | f"ws://{config.ip}:{config.gremlin_port}/gremlin", 38 | traversal_source="g", 39 | username=config.username, 40 | password=config.password, 41 | message_serializer=GraphBinarySerializersV1(), 42 | ) 43 | 44 | def set_schema( 45 | self, mapping_config: Union[PuppyGraphMappingConfig, Dict] 46 | ) -> PuppyGraphMappingConfig: 47 | """Sets the graph mapping config in PuppyGraph server. 48 | 49 | Args: 50 | mapping_config: The graph mapping config to set, can be json or dataclass. 51 | 52 | Returns: 53 | The puppygraph graph mapping config that was set. 54 | """ 55 | return _set_schema(host_config=self._config, mapping_config=mapping_config) 56 | 57 | def get_schema(self) -> str: 58 | """Returns the json schema of the PuppyGraph database.""" 59 | return _get_schema(config=self._config) 60 | 61 | def cypher_query( 62 | self, query: str, params: Optional[Dict[str, Any]] = None, timeout_ms=30000 63 | ) -> List[Dict[str, Any]]: 64 | """Executes a Cypher query on the puppy graph. 65 | 66 | Args: 67 | query: The Cypher query to execute. 68 | params: The parameters to pass to the Cypher query. 69 | timeout_ms: The timeout in milliseconds for the query, defaults to 30000. 70 | 71 | Returns: 72 | The result of the Cypher query in a list of dictionaries. 73 | """ 74 | return _run_cypher_query( 75 | cypher_driver=self._cypher_query_driver, 76 | query=query, 77 | params=params, 78 | timeout_ms=timeout_ms, 79 | ) 80 | 81 | def gremlin_query(self, query: str, timeout_ms=30000) -> List[Dict[str, Any]]: 82 | """Executes a Gremlin query on the puppy graph. 83 | 84 | Args: 85 | query: The Gremlin query to execute. 86 | timeout_ms: The timeout in milliseconds for the query, defaults to 30000. 87 | 88 | Returns: 89 | The result of the Gremlin query in a list of dictionaries. 90 | """ 91 | return _run_gremlin_query( 92 | gremlin_client=self._gremlin_query_client, 93 | query=query, 94 | timeout_ms=timeout_ms, 95 | ) 96 | 97 | 98 | def _set_schema( 99 | host_config: PuppyGraphHostConfig, 100 | mapping_config: Union[PuppyGraphMappingConfig, Dict], 101 | ) -> PuppyGraphMappingConfig: 102 | """Sets the graph schema in PuppyGraph server. 103 | 104 | Args: 105 | host_config: The host configuration of the PuppyGraph server. 106 | mapping_config: The graph mapping config that defines how the graph is constructed, can be json or dataclass. 107 | 108 | Returns: 109 | The puppygraph schema that was set. 110 | """ 111 | 112 | schema_json = convert_mapping_config_to_host_json(config=mapping_config) 113 | logger.info( 114 | "Setting graph schema in PuppyGraph server...\n=============================\n%s\n=============================\n", 115 | schema_json, 116 | ) 117 | 118 | response = requests.post( 119 | f"http://{host_config.ip}:{host_config.http_port}/schema", 120 | auth=(host_config.username, host_config.password), 121 | json=schema_json, 122 | timeout=60, 123 | ) 124 | response.raise_for_status() 125 | logger.info("Successfully updated the schema in PuppyGraph!") 126 | 127 | # Check if PuppyGraph is ready to serve 128 | response = requests.get( 129 | f"http://{host_config.ip}:{host_config.http_port}/schemajson", 130 | auth=(host_config.username, host_config.password), 131 | timeout=60, 132 | ) 133 | while response.status_code != 200: 134 | time.sleep(10) 135 | logger.info("PuppyGraph is ready to serve the new schema!") 136 | return mapping_config 137 | 138 | 139 | def _get_schema(config: PuppyGraphHostConfig) -> str: 140 | """Returns the schema of the PuppyGraph database. 141 | 142 | Args: 143 | config: The PuppyGraph host configuration. 144 | 145 | Returns: 146 | The schema of the PuppyGraph database in string format. 147 | """ 148 | response = requests.get( 149 | f"http://{config.ip}:{config.http_port}/schemajson", 150 | auth=(config.username, config.password), 151 | timeout=60, 152 | ) 153 | response.raise_for_status() 154 | return response.text 155 | 156 | 157 | class _QueryThread(threading.Thread): 158 | def __init__(self, target, *args, **kwargs): 159 | super().__init__() 160 | self._target = target 161 | self._args = args 162 | self._kwargs = kwargs 163 | self._result = None 164 | self._error = None 165 | 166 | def run(self): 167 | try: 168 | self._result = self._target(*self._args, **self._kwargs) 169 | except Exception as e: 170 | self._error = e 171 | 172 | def get_result(self): 173 | if self._error: 174 | raise self._error 175 | return self._result 176 | 177 | 178 | def _run_with_threading_timeout(fn, timeout, *args, **kwargs): 179 | thread = _QueryThread(fn, *args, **kwargs) 180 | thread.start() 181 | thread.join(timeout / 1e3) # Convert ms to seconds 182 | 183 | if thread.is_alive(): 184 | raise TimeoutError(f"Operation timed out after {timeout} ms!") 185 | return thread.get_result() 186 | 187 | 188 | def _cypher_query_fn( 189 | cypher_driver: CypherDriver, 190 | query: str, 191 | params: Optional[Dict[str, Any]], 192 | timeout_s: float, 193 | ): 194 | with cypher_driver.session() as session: 195 | neo4j_query = Query(text=query, timeout=timeout_s) 196 | try: 197 | res: Result = session.run(neo4j_query, params) 198 | json_data: List[Dict[str, Any]] = [_unpack_value(record) for record in res] 199 | return json_data 200 | except CypherSyntaxError as e: 201 | raise ValueError(f"`{query}` is not valid:\n{e}") from e 202 | except (AuthError, ServiceUnavailable, TimeoutError) as e: 203 | raise e 204 | 205 | def _unpack_value(value): 206 | if isinstance(value, Node): 207 | return { 208 | "type": "Node", 209 | "id": value.element_id, 210 | "labels": list(value.labels), 211 | "properties": dict(value.items()) 212 | } 213 | elif isinstance(value, Relationship): 214 | return { 215 | "type": "Relationship", 216 | "id": value.element_id, 217 | "type_name": value.type, 218 | "start_id": value.start_node.element_id, 219 | "end_id": value.end_node.element_id, 220 | "properties": dict(value.items()) 221 | } 222 | elif isinstance(value, Path): 223 | return { 224 | "type": "Path", 225 | "nodes": [_unpack_value(n) for n in value.nodes], 226 | "relationships": [_unpack_value(r) for r in value.relationships] 227 | } 228 | elif isinstance(value, list): 229 | return [_unpack_value(v) for v in value] 230 | elif isinstance(value, dict): 231 | return {k: _unpack_value(v) for k, v in value.items()} 232 | else: 233 | return value # fall back to raw value (int, str, etc.) 234 | 235 | 236 | def _gremlin_query_fn(gremlin_client: GremlinClient.Client, query: str): 237 | try: 238 | result_set = gremlin_client.submit(query) 239 | results = result_set.all().result() 240 | return results 241 | except GremlinServerError as e: 242 | raise ValueError(f"Gremlin query error: {e}") from e 243 | except TimeoutError as e: 244 | raise TimeoutError(f"Timeout occurred: {e}") from e 245 | except Exception as e: 246 | raise e 247 | 248 | 249 | def _run_cypher_query( 250 | cypher_driver: CypherDriver, 251 | query: str, 252 | params: Optional[Dict[str, Any]] = None, 253 | timeout_ms: int = 300000, 254 | ) -> List[Dict[str, Any]]: 255 | return _run_with_threading_timeout( 256 | _cypher_query_fn, timeout_ms, cypher_driver, query, params, timeout_ms / 1e3 257 | ) 258 | 259 | 260 | def _run_gremlin_query( 261 | gremlin_client: GremlinClient.Client, 262 | query: str, 263 | timeout_ms: int = 300000, 264 | ) -> List[Dict[str, Any]]: 265 | return _run_with_threading_timeout( 266 | _gremlin_query_fn, timeout_ms, gremlin_client, query 267 | ) 268 | -------------------------------------------------------------------------------- /apps/chatbot/mcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import asyncio 4 | import json 5 | import logging 6 | from typing import Any, Dict, List, Optional, Sequence 7 | 8 | from mcp.server.models import InitializationOptions 9 | from mcp.server import NotificationOptions, Server 10 | from mcp.server.stdio import stdio_server 11 | from mcp.types import ( 12 | Resource, 13 | Tool, 14 | TextContent, 15 | ImageContent, 16 | EmbeddedResource, 17 | LoggingLevel 18 | ) 19 | import mcp.types as types 20 | from neo4j import GraphDatabase, Driver 21 | import requests 22 | 23 | logging.basicConfig(level=logging.INFO) 24 | logger = logging.getLogger("puppygraph-mcp") 25 | 26 | class PuppyGraphMCPServer: 27 | def __init__(self, 28 | bolt_uri: str = "bolt://localhost:7687", 29 | http_uri: str = "http://localhost:8081", 30 | username: str = "puppygraph", 31 | password: str = "puppygraph123"): 32 | self.bolt_uri = bolt_uri 33 | self.http_uri = http_uri 34 | self.username = username 35 | self.password = password 36 | self.driver: Optional[Driver] = None 37 | self.schema_cache: Optional[Dict[str, Any]] = None 38 | 39 | # Initialize Neo4j driver 40 | try: 41 | self.driver = GraphDatabase.driver(bolt_uri, auth=(username, password)) 42 | # Test connection 43 | with self.driver.session() as session: 44 | session.run("RETURN 1") 45 | logger.info(f"Connected to PuppyGraph at {bolt_uri}") 46 | except Exception as e: 47 | logger.error(f"Failed to connect to PuppyGraph: {e}") 48 | raise 49 | 50 | def close(self): 51 | if self.driver: 52 | self.driver.close() 53 | 54 | def get_schema(self) -> Dict[str, Any]: 55 | """Fetch schema from PuppyGraph HTTP API""" 56 | if self.schema_cache: 57 | return self.schema_cache 58 | 59 | try: 60 | response = requests.get( 61 | f"{self.http_uri}/schemajson", 62 | auth=(self.username, self.password), 63 | timeout=10 64 | ) 65 | response.raise_for_status() 66 | self.schema_cache = response.json() 67 | return self.schema_cache 68 | except Exception as e: 69 | logger.error(f"Failed to fetch schema: {e}") 70 | return {"vertices": [], "edges": []} 71 | 72 | def execute_cypher(self, query: str, params: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]: 73 | """Execute Cypher query against PuppyGraph""" 74 | if not self.driver: 75 | raise RuntimeError("Not connected to PuppyGraph") 76 | 77 | try: 78 | with self.driver.session() as session: 79 | result = session.run(query, params or {}) 80 | return [record.data() for record in result] 81 | except Exception as e: 82 | logger.error(f"Cypher query failed: {e}") 83 | raise 84 | 85 | server = Server("puppygraph-mcp") 86 | puppygraph = PuppyGraphMCPServer() 87 | 88 | @server.list_resources() 89 | async def handle_list_resources() -> list[Resource]: 90 | """List available resources""" 91 | return [ 92 | Resource( 93 | uri="puppygraph://schema", 94 | name="PuppyGraph Schema", 95 | description="Current graph schema with vertex and edge definitions", 96 | mimeType="application/json", 97 | ), 98 | Resource( 99 | uri="puppygraph://stats", 100 | name="Graph Statistics", 101 | description="Basic statistics about the graph (node/edge counts)", 102 | mimeType="application/json", 103 | ) 104 | ] 105 | 106 | @server.read_resource() 107 | async def handle_read_resource(uri: str) -> str: 108 | """Read a resource by URI""" 109 | if uri == "puppygraph://schema": 110 | schema = puppygraph.get_schema() 111 | return json.dumps(schema, indent=2) 112 | 113 | elif uri == "puppygraph://stats": 114 | try: 115 | stats = puppygraph.execute_cypher(""" 116 | MATCH (n) 117 | WITH count(n) as node_count 118 | MATCH ()-[r]->() 119 | WITH node_count, count(r) as edge_count 120 | RETURN node_count, edge_count 121 | """) 122 | return json.dumps(stats[0] if stats else {"node_count": 0, "edge_count": 0}, indent=2) 123 | except Exception as e: 124 | return json.dumps({"error": str(e)}, indent=2) 125 | 126 | else: 127 | raise ValueError(f"Unknown resource: {uri}") 128 | 129 | @server.list_tools() 130 | async def handle_list_tools() -> list[Tool]: 131 | """List available tools""" 132 | return [ 133 | Tool( 134 | name="execute_cypher", 135 | description="Execute a Cypher query against PuppyGraph and return results", 136 | inputSchema={ 137 | "type": "object", 138 | "properties": { 139 | "query": { 140 | "type": "string", 141 | "description": "The Cypher query to execute", 142 | }, 143 | "parameters": { 144 | "type": "object", 145 | "description": "Optional parameters for the query", 146 | "additionalProperties": True, 147 | } 148 | }, 149 | "required": ["query"], 150 | }, 151 | ), 152 | Tool( 153 | name="get_schema_info", 154 | description="Get detailed schema information about vertices and edges", 155 | inputSchema={ 156 | "type": "object", 157 | "properties": { 158 | "include_samples": { 159 | "type": "boolean", 160 | "description": "Whether to include sample data", 161 | "default": False 162 | } 163 | } 164 | } 165 | ), 166 | Tool( 167 | name="validate_cypher", 168 | description="Validate a Cypher query without executing it", 169 | inputSchema={ 170 | "type": "object", 171 | "properties": { 172 | "query": { 173 | "type": "string", 174 | "description": "The Cypher query to validate", 175 | } 176 | }, 177 | "required": ["query"], 178 | }, 179 | ) 180 | ] 181 | 182 | @server.call_tool() 183 | async def handle_call_tool(name: str, arguments: dict) -> list[types.TextContent]: 184 | """Handle tool calls""" 185 | 186 | if name == "execute_cypher": 187 | query = arguments.get("query") 188 | parameters = arguments.get("parameters", {}) 189 | 190 | if not query: 191 | return [types.TextContent(type="text", text="Error: No query provided")] 192 | 193 | try: 194 | results = puppygraph.execute_cypher(query, parameters) 195 | return [types.TextContent( 196 | type="text", 197 | text=json.dumps(results, indent=2, default=str) 198 | )] 199 | except Exception as e: 200 | return [types.TextContent( 201 | type="text", 202 | text=f"Error executing query: {str(e)}" 203 | )] 204 | 205 | elif name == "get_schema_info": 206 | include_samples = arguments.get("include_samples", False) 207 | 208 | try: 209 | schema = puppygraph.get_schema() 210 | 211 | if include_samples: 212 | # Get sample data for each vertex and edge type 213 | samples = {} 214 | for vertex in schema.get("vertices", []): 215 | label = vertex["label"] 216 | try: 217 | sample_query = f"MATCH (n:{label}) RETURN n LIMIT 3" 218 | samples[f"vertex_{label}"] = puppygraph.execute_cypher(sample_query) 219 | except: 220 | samples[f"vertex_{label}"] = [] 221 | 222 | for edge in schema.get("edges", []): 223 | label = edge["label"] 224 | try: 225 | sample_query = f"MATCH ()-[r:{label}]->() RETURN r LIMIT 3" 226 | samples[f"edge_{label}"] = puppygraph.execute_cypher(sample_query) 227 | except: 228 | samples[f"edge_{label}"] = [] 229 | 230 | result = {"schema": schema, "samples": samples} 231 | else: 232 | result = {"schema": schema} 233 | 234 | return [types.TextContent( 235 | type="text", 236 | text=json.dumps(result, indent=2, default=str) 237 | )] 238 | except Exception as e: 239 | return [types.TextContent( 240 | type="text", 241 | text=f"Error getting schema: {str(e)}" 242 | )] 243 | 244 | elif name == "validate_cypher": 245 | query = arguments.get("query") 246 | 247 | if not query: 248 | return [types.TextContent(type="text", text="Error: No query provided")] 249 | 250 | try: 251 | # Try to explain the query to validate syntax 252 | explain_query = f"EXPLAIN {query}" 253 | puppygraph.execute_cypher(explain_query) 254 | return [types.TextContent( 255 | type="text", 256 | text="Query syntax is valid" 257 | )] 258 | except Exception as e: 259 | return [types.TextContent( 260 | type="text", 261 | text=f"Query validation failed: {str(e)}" 262 | )] 263 | 264 | else: 265 | return [types.TextContent( 266 | type="text", 267 | text=f"Unknown tool: {name}" 268 | )] 269 | 270 | async def main(): 271 | # Register cleanup handler 272 | import atexit 273 | atexit.register(puppygraph.close) 274 | 275 | async with stdio_server() as (read_stream, write_stream): 276 | await server.run( 277 | read_stream, 278 | write_stream, 279 | InitializationOptions( 280 | server_name="puppygraph-mcp", 281 | server_version="1.0.0", 282 | capabilities=server.get_capabilities( 283 | notification_options=NotificationOptions(), 284 | experimental_capabilities={}, 285 | ), 286 | ), 287 | ) 288 | 289 | if __name__ == "__main__": 290 | asyncio.run(main()) -------------------------------------------------------------------------------- /apps/imdb/run_agent.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | from functools import partial 5 | from typing import Iterable, List, Optional, Union 6 | 7 | import gradio as gr 8 | import yaml 9 | from langchain_community.tools.google_serper.tool import GoogleSerperRun 10 | from langchain_community.utilities import GoogleSerperAPIWrapper 11 | from langchain_core.messages import AIMessage, ToolMessage 12 | from langchain_core.prompts.chat import ChatPromptTemplate, MessagesPlaceholder 13 | from langchain_core.pydantic_v1 import Field, create_model 14 | from langchain_core.tools import StructuredTool 15 | from langchain_openai import ChatOpenAI 16 | 17 | from puppygraph import PuppyGraphClient, PuppyGraphHostConfig 18 | from puppygraph.rag import PuppyGraphAgent 19 | 20 | 21 | def _get_graph_schema_prompt(query_language: str) -> str: 22 | 23 | schema_prompt = """ 24 | Nodes are the following: 25 | - person: 26 | properties: 27 | - name: primaryName 28 | type: String 29 | description: The name of the person, as listed in the IMDb database. 30 | - name: birthYear 31 | type: Int 32 | description: The birth year of the person (if available). 33 | - name: deathYear 34 | type: Int 35 | description: The death year of the person (if available). 36 | 37 | - title: 38 | properties: 39 | - name: titleType 40 | type: String 41 | description: The type/format of the title (e.g., movie, short, tvseries, tvepisode, video, etc.). 42 | - name: primaryTitle 43 | type: String 44 | description: The more popular title or the title used by filmmakers on promotional materials at the point of release. 45 | - name: originalTitle 46 | type: String 47 | description: The original title, in the original language. 48 | - name: isAdult 49 | type: Boolean 50 | description: Indicates whether the title is for adults (1: adult title, 0: non-adult title). 51 | - name: startYear 52 | type: Int 53 | description: Represents the release year of a title. For TV Series, this is the series start year. 54 | - name: endYear 55 | type: Int 56 | description: For TV Series, this is the series end year. '\\N' for all other title types. 57 | - name: runtimeMinutes 58 | type: Int 59 | description: The primary runtime of the title, in minutes. 60 | 61 | Edges are the following: 62 | - cast_and_crew: 63 | from: title 64 | to: person 65 | properties: 66 | - name: ordering 67 | type: Int 68 | description: A unique identifier for the row, used to determine the order of people associated with this title. 69 | - name: category 70 | type: String 71 | description: The category of job that the person was in (e.g., actor, director). 72 | - name: job 73 | type: String 74 | description: The specific job title if applicable, else '\\N'. 75 | - name: characters 76 | type: String 77 | description: The name of the character played if applicable, else '\\N'. 78 | """ 79 | if query_language == "cypher": 80 | additional_instructions = "" 81 | elif query_language == "gremlin": 82 | additional_instructions = """ 83 | The relationships are the following: 84 | g.V().hasLabel('title').out('cast_and_crew').hasLabel('person'), 85 | g.V().hasLabel('person').in('cast_and_crew').hasLabel('title'), 86 | 87 | if filter by category, you must use outE() or inE(), because the category is stored in the EDGE properties. 88 | """ 89 | else: 90 | raise NotImplementedError(f"Query language {query_language} is not supported.") 91 | 92 | return schema_prompt + additional_instructions 93 | 94 | 95 | def _get_chat_prompt_template( 96 | graph_schema_prompt: str, search_tool_enabled: bool 97 | ) -> ChatPromptTemplate: 98 | 99 | if search_tool_enabled: 100 | additional_conclusion_prompt = ", please also cite the source [🌐] or [📈] indicating whether the information is from the internet or from the graph database if applicable" 101 | else: 102 | additional_conclusion_prompt = "" 103 | return ChatPromptTemplate.from_messages( 104 | [ 105 | ( 106 | "system", 107 | "You are a helpful assistant to help answer user questions about imdb." 108 | "You will need to use the information stored in the graph database to answer the user's questions." 109 | "Here is some information about the graph database schema.\n" 110 | f"{graph_schema_prompt}", 111 | ), 112 | ( 113 | "system", 114 | "You must first output a PLAN, then you can use the PLAN to call the tools.\n" 115 | "Each STEP of the PLAN should be corresponding to one or more function calls (but not less), either simple or complex.\n" 116 | "Minimize the number of steps in the PLAN, but make sure the PLAN is workable.\n" 117 | "Remember, each step can be converted to a Gremlin query, since Gremlin query can handle quite complex queries," 118 | "each step can be complex as well as long as it can be converted to a Gremlin query.", 119 | ), 120 | MessagesPlaceholder(variable_name="message_history"), 121 | ( 122 | "system", 123 | "Always use the JSON format {\n" 124 | "'THINKING': ," 125 | "'PLAN': ," 126 | f"'CONCLUSION': ", 127 | ), 128 | ], 129 | template_format="jinja2", 130 | ) 131 | 132 | 133 | def _get_llm() -> ChatOpenAI: 134 | return ChatOpenAI( 135 | model="gpt-4o-2024-08-06", 136 | temperature=0, 137 | api_key=os.getenv("OPENAI_API_KEY"), 138 | ) 139 | 140 | 141 | def _get_puppy_graph_client(ip) -> PuppyGraphClient: 142 | return PuppyGraphClient(PuppyGraphHostConfig(ip=ip)) 143 | 144 | 145 | def _display_ai_message_content( 146 | ai_message_content: str, is_last_message: bool 147 | ) -> Iterable[str]: 148 | if not is_last_message: 149 | conclusion_emoji = "📝" 150 | else: 151 | conclusion_emoji = "✅" 152 | 153 | try: 154 | text_dict = yaml.safe_load(ai_message_content) 155 | if "THINKING" in text_dict: 156 | yield f"📝 {text_dict['THINKING']}" 157 | 158 | if "PLAN" in text_dict: 159 | yield f"📝 {text_dict['PLAN']}" 160 | if "CONCLUSION" in text_dict: 161 | yield f"{conclusion_emoji} {text_dict['CONCLUSION']}" 162 | except Exception as _: 163 | text_split = ai_message_content.split("'CONCLUSION':") 164 | seps = "\n} " 165 | yield f"{conclusion_emoji} {text_split[-1].strip(seps)}" 166 | 167 | 168 | def _display_ai_message_tool_calls(tool_calls: List[str]) -> Iterable[str]: 169 | for tool_call in tool_calls: 170 | yield f"🔨 Calling {tool_call['name']} with args: {tool_call['args']}" 171 | 172 | 173 | def _display_tool_message(tool_message: ToolMessage) -> Iterable[str]: 174 | yield f"🔨 Response: {tool_message.content}" 175 | 176 | 177 | def _display_message( 178 | message: Optional[Union[AIMessage, ToolMessage]], is_last_message: bool = False 179 | ) -> Iterable[str]: 180 | if message is None: 181 | return 182 | 183 | if isinstance(message, AIMessage): 184 | yield from _display_ai_message_content(message.content, is_last_message) 185 | yield from _display_ai_message_tool_calls(message.tool_calls) 186 | elif isinstance(message, ToolMessage): 187 | yield from _display_tool_message(message) 188 | 189 | 190 | def _get_displayable_responses( 191 | pg_agent: PuppyGraphAgent, user_message: str 192 | ) -> Iterable[str]: 193 | response_iter = pg_agent.query(user_input=user_message, max_iters=20) 194 | previous_message = None 195 | while True: 196 | try: 197 | current_message = next(response_iter) 198 | for display_string in _display_message(previous_message): 199 | yield display_string 200 | previous_message = current_message 201 | except StopIteration: 202 | for display_string in _display_message( 203 | previous_message, is_last_message=True 204 | ): 205 | yield display_string 206 | break 207 | 208 | 209 | def _gradio_respond(pg_agent: PuppyGraphAgent, verbose_mode: bool, message, _): 210 | all_responses = "" 211 | for response in _get_displayable_responses(pg_agent=pg_agent, user_message=message): 212 | all_responses += response + "\n" 213 | if verbose_mode: 214 | yield all_responses 215 | else: 216 | time.sleep(0.5) 217 | yield response 218 | 219 | 220 | def _get_gradio_chatbot( 221 | pg_agent: PuppyGraphAgent, verbose_mode: bool = False 222 | ) -> gr.ChatInterface: 223 | clear_btn = gr.Button("Clear", variant="secondary", size="sm", min_width=60) 224 | chat_bot = gr.ChatInterface( 225 | fn=partial(_gradio_respond, pg_agent, verbose_mode), clear_btn=clear_btn 226 | ) 227 | with chat_bot: 228 | clear_btn.click(pg_agent.reset_messages) 229 | 230 | return chat_bot 231 | 232 | 233 | def main(): 234 | """Main function for running the PuppyGraphAgent.""" 235 | # Set up argument parsing 236 | parser = argparse.ArgumentParser(description="Configure PuppyGraphAgent settings.") 237 | parser.add_argument( 238 | "--ip", 239 | type=str, 240 | default="localhost", 241 | help="The IP address for the PuppyGraph.", 242 | ) 243 | parser.add_argument( 244 | "--query_language", 245 | type=str, 246 | default="gremlin", 247 | help="The query language to be used (choose from 'gremlin' or 'cypher').", 248 | ) 249 | parser.add_argument("--verbose", action="store_true", help="Enable verbose mode.") 250 | parser.add_argument( 251 | "--search", action="store_true", help="Enable search tool through internet." 252 | ) 253 | 254 | # Parse the command-line arguments 255 | args = parser.parse_args() 256 | 257 | # Extract the arguments 258 | ip = args.ip 259 | query_language = args.query_language 260 | verbose_mode = args.verbose 261 | search_tool_enabled = args.search 262 | 263 | pg_agent = PuppyGraphAgent( 264 | puppy_graph_client=_get_puppy_graph_client(ip=ip), 265 | llm=_get_llm(), 266 | chat_prompt_template=_get_chat_prompt_template( 267 | graph_schema_prompt=_get_graph_schema_prompt(query_language=query_language), 268 | search_tool_enabled=search_tool_enabled, 269 | ), 270 | query_language=query_language, 271 | additional_tools=( 272 | [ 273 | StructuredTool.from_function( 274 | func=GoogleSerperAPIWrapper().run, 275 | name="google_serper", 276 | description="Query the internet.", 277 | args_schema=create_model( 278 | "", query=(str, Field(description="query")) 279 | ), 280 | ) 281 | ] 282 | if search_tool_enabled 283 | else None 284 | ), 285 | ) 286 | 287 | _get_gradio_chatbot(pg_agent=pg_agent, verbose_mode=verbose_mode).launch() 288 | 289 | 290 | if __name__ == "__main__": 291 | 292 | main() 293 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /apps/databricks_mining_site/set_graph_schema.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from puppygraph import PuppyGraphClient, PuppyGraphHostConfig 4 | 5 | logging.basicConfig(level=logging.INFO) 6 | 7 | 8 | if __name__ == "__main__": 9 | cilent = PuppyGraphClient(PuppyGraphHostConfig(ip="127.0.0.1")) 10 | cilent.set_schema( 11 | { 12 | "catalogs": [ 13 | { 14 | "name": "pg_databricks", 15 | "type": "DELTALAKE", 16 | "params": { 17 | "metastore_param": { 18 | "token": "${ENV:DATABRICKS_TOKEN}", 19 | "host": "${ENV:DATABRICKS_HOST}", 20 | "unity_catalog_name": "pg_databricks", 21 | }, 22 | "storage_param": { 23 | "use_instance_profile": "false", 24 | "region": "us-east-1", 25 | "access_key": "${ENV:AWS_ACCESS_KEY_ID}", 26 | "secret_key": "${ENV:AWS_SECRET_ACCESS_KEY}", 27 | "enable_ssl": "false", 28 | "type": "S3", 29 | }, 30 | }, 31 | } 32 | ], 33 | "vertices": [ 34 | { 35 | "table_source": { 36 | "catalog_name": "pg_databricks", 37 | "schema_name": "bronze", 38 | "table_name": "failure_type", 39 | }, 40 | "label": "failure_type", 41 | "description": "A type of failure", 42 | "attributes": [ 43 | { 44 | "name": "failure_type_name", 45 | "from_field": "failure_type_name", 46 | "type": "String", 47 | "description": "The name of the failure type", 48 | } 49 | ], 50 | "id": [ 51 | { 52 | "name": "failure_type_id", 53 | "from_field": "failure_type_id", 54 | "type": "String", 55 | } 56 | ], 57 | }, 58 | { 59 | "table_source": { 60 | "catalog_name": "pg_databricks", 61 | "schema_name": "silver", 62 | "table_name": "assets", 63 | }, 64 | "label": "asset", 65 | "description": "An asset in the system", 66 | "attributes": [ 67 | { 68 | "name": "asset_id", 69 | "from_field": "asset_id", 70 | "type": "String", 71 | "description": "The ID of the asset", 72 | }, 73 | { 74 | "name": "asset_name", 75 | "from_field": "asset_name", 76 | "type": "String", 77 | "description": "The name of the asset", 78 | }, 79 | { 80 | "name": "asset_type", 81 | "from_field": "asset_type", 82 | "type": "String", 83 | "description": "The type of the asset", 84 | }, 85 | { 86 | "name": "location", 87 | "from_field": "location", 88 | "type": "String", 89 | "description": "The location of the asset", 90 | }, 91 | { 92 | "name": "acquisition_date", 93 | "from_field": "acquisition_date_formatted", 94 | "type": "Date", 95 | "description": "The acquisition date of the asset", 96 | }, 97 | { 98 | "name": "status", 99 | "from_field": "status", 100 | "type": "String", 101 | "description": "The status of the asset", 102 | }, 103 | ], 104 | "id": [{"name": "id", "from_field": "asset_id", "type": "String"}], 105 | }, 106 | { 107 | "table_source": { 108 | "catalog_name": "pg_databricks", 109 | "schema_name": "gold", 110 | "table_name": "work_orders", 111 | }, 112 | "label": "work_order", 113 | "description": "A work order in the system", 114 | "attributes": [ 115 | { 116 | "name": "work_order_id", 117 | "from_field": "work_order_id", 118 | "type": "String", 119 | "description": "The ID of the work order", 120 | }, 121 | { 122 | "name": "date", 123 | "from_field": "date", 124 | "type": "Date", 125 | "description": "The date of the work order", 126 | }, 127 | { 128 | "name": "action_taken", 129 | "from_field": "action_taken", 130 | "type": "String", 131 | "description": "The action taken for the work order", 132 | }, 133 | { 134 | "name": "technician", 135 | "from_field": "technician", 136 | "type": "String", 137 | "description": "The technician handling the work order", 138 | }, 139 | { 140 | "name": "component_replaced_description", 141 | "from_field": "component_replaced_description", 142 | "type": "String", 143 | "description": "Description of the component replaced", 144 | }, 145 | { 146 | "name": "component_replaced_material_num", 147 | "from_field": "component_replaced_material_num", 148 | "type": "String", 149 | "description": "Material number of the component replaced", 150 | }, 151 | { 152 | "name": "repeated_work_order", 153 | "from_field": "repeated_work_order", 154 | "type": "Boolean", 155 | "description": "Whether the work order is a repeated one", 156 | }, 157 | { 158 | "name": "successful_fix", 159 | "from_field": "successful_fix", 160 | "type": "Boolean", 161 | "description": "Whether the issue was successfully fixed", 162 | }, 163 | ], 164 | "id": [ 165 | {"name": "id", "from_field": "work_order_id", "type": "String"} 166 | ], 167 | }, 168 | ], 169 | "edges": [ 170 | { 171 | "table_source": { 172 | "catalog_name": "pg_databricks", 173 | "schema_name": "silver", 174 | "table_name": "troubleshooting_guide", 175 | }, 176 | "label": "can_have_failure", 177 | "from_label": "asset", 178 | "to_label": "failure_type", 179 | "description": "An asset can have a failure type", 180 | "attributes": [ 181 | { 182 | "name": "steps_to_follow", 183 | "from_field": "steps_to_follow", 184 | "type": "String", 185 | "description": "Steps to follow for the failure", 186 | }, 187 | { 188 | "name": "reference_source", 189 | "from_field": "reference_source", 190 | "type": "String", 191 | "description": "The reference source for the failure", 192 | }, 193 | { 194 | "name": "recommended_actions", 195 | "from_field": "recommended_actions", 196 | "type": "String", 197 | "description": "The recommended actions for the failure", 198 | }, 199 | ], 200 | "id": [ 201 | { 202 | "name": "can_have_failure_id", 203 | "from_field": "reference_id", 204 | "type": "String", 205 | } 206 | ], 207 | "from_id": [ 208 | {"name": "asset_id", "from_field": "asset_id", "type": "String"} 209 | ], 210 | "to_id": [ 211 | { 212 | "name": "failure_type_id", 213 | "from_field": "failure_type", 214 | "type": "String", 215 | } 216 | ], 217 | }, 218 | { 219 | "table_source": { 220 | "catalog_name": "pg_databricks", 221 | "schema_name": "gold", 222 | "table_name": "work_orders", 223 | }, 224 | "label": "worked_on", 225 | "from_label": "work_order", 226 | "to_label": "asset", 227 | "description": "A work order worked on an asset", 228 | "attributes": [], 229 | "id": [ 230 | { 231 | "name": "worked_on_id", 232 | "from_field": "work_order_id", 233 | "type": "String", 234 | } 235 | ], 236 | "from_id": [ 237 | { 238 | "name": "work_order_id", 239 | "from_field": "work_order_id", 240 | "type": "String", 241 | } 242 | ], 243 | "to_id": [ 244 | {"name": "asset_id", "from_field": "asset_id", "type": "String"} 245 | ], 246 | }, 247 | { 248 | "table_source": { 249 | "catalog_name": "pg_databricks", 250 | "schema_name": "gold", 251 | "table_name": "work_orders", 252 | }, 253 | "label": "related_to_failure", 254 | "from_label": "work_order", 255 | "to_label": "failure_type", 256 | "description": "A work order is related to a failure type", 257 | "attributes": [], 258 | "id": [ 259 | { 260 | "name": "related_to_failure_id", 261 | "from_field": "work_order_id", 262 | "type": "String", 263 | } 264 | ], 265 | "from_id": [ 266 | { 267 | "name": "work_order_id", 268 | "from_field": "work_order_id", 269 | "type": "String", 270 | } 271 | ], 272 | "to_id": [ 273 | { 274 | "name": "failure_type_id", 275 | "from_field": "llm_failure_type", 276 | "type": "String", 277 | } 278 | ], 279 | }, 280 | ], 281 | } 282 | ) 283 | -------------------------------------------------------------------------------- /puppygraph/common/test_conversion_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from puppygraph.common.conversion_utils import convert_mapping_config_to_host_json 3 | 4 | 5 | @pytest.mark.parametrize( 6 | "config_dict, expected_json", 7 | [ 8 | ( 9 | { 10 | "catalogs": [ 11 | { 12 | "name": "my_catalog", 13 | "type": "DELTALAKE", 14 | "params": { 15 | "metastore_param": { 16 | "token": "my_token", 17 | "host": "my_host", 18 | "unity_catalog_name": "my_catalog_name", 19 | }, 20 | "storage_param": { 21 | "use_instance_profile": "false", 22 | "region": "my_region", 23 | "access_key": "my_access_key", 24 | "secret_key": "my_secret_key", 25 | "enable_ssl": "true", 26 | "type": "S3", 27 | }, 28 | }, 29 | } 30 | ], 31 | "vertices": [ 32 | { 33 | "label": "Person", 34 | "table_source": { 35 | "catalog_name": "my_catalog", 36 | "schema_name": "my_schema", 37 | "table_name": "person_table", 38 | }, 39 | "id": [{"name": "id", "type": "String", "from_field": "id"}], 40 | "attributes": [ 41 | { 42 | "name": "name", 43 | "type": "String", 44 | "from_field": "person_name", 45 | }, 46 | {"name": "age", "type": "Int", "from_field": "person_age"}, 47 | ], 48 | }, 49 | { 50 | "label": "Location", 51 | "table_source": { 52 | "catalog_name": "my_catalog", 53 | "schema_name": "my_schema", 54 | "table_name": "location_table", 55 | }, 56 | "id": [{"name": "id", "type": "String", "from_field": "id"}], 57 | "attributes": [ 58 | { 59 | "name": "name", 60 | "type": "String", 61 | "from_field": "location_name", 62 | }, 63 | { 64 | "name": "latitude", 65 | "type": "Float", 66 | "from_field": "location_lat", 67 | }, 68 | { 69 | "name": "longitude", 70 | "type": "Float", 71 | "from_field": "location_long", 72 | }, 73 | ], 74 | }, 75 | ], 76 | "edges": [ 77 | { 78 | "label": "LivesIn", 79 | "from_label": "Person", 80 | "to_label": "Location", 81 | "table_source": { 82 | "catalog_name": "my_catalog", 83 | "schema_name": "my_schema", 84 | "table_name": "lives_in_table", 85 | }, 86 | "id": [{"name": "id", "type": "String", "from_field": "id"}], 87 | "from_id": [ 88 | { 89 | "name": "from_id", 90 | "type": "String", 91 | "from_field": "from_id", 92 | } 93 | ], 94 | "to_id": [ 95 | {"name": "to_id", "type": "String", "from_field": "to_id"} 96 | ], 97 | "attributes": [ 98 | { 99 | "name": "since", 100 | "type": "Date", 101 | "from_field": "since_date", 102 | } 103 | ], 104 | }, 105 | { 106 | "label": "Likes", 107 | "from_label": "Person", 108 | "to_label": "Person", 109 | "table_source": { 110 | "catalog_name": "my_catalog", 111 | "schema_name": "my_schema", 112 | "table_name": "likes_table", 113 | }, 114 | "id": [{"name": "id", "type": "String", "from_field": "id"}], 115 | "from_id": [ 116 | { 117 | "name": "from_id", 118 | "type": "String", 119 | "from_field": "from_id", 120 | } 121 | ], 122 | "to_id": [ 123 | {"name": "to_id", "type": "String", "from_field": "to_id"} 124 | ], 125 | "attributes": [], 126 | }, 127 | ], 128 | }, 129 | { 130 | "catalogs": [ 131 | { 132 | "name": "my_catalog", 133 | "type": "deltalake", 134 | "metastore": { 135 | "type": "unity", 136 | "token": "my_token", 137 | "host": "my_host", 138 | "databricksCatalogName": "my_catalog_name", 139 | }, 140 | "storage": { 141 | "useInstanceProfile": "false", 142 | "region": "my_region", 143 | "accessKey": "my_access_key", 144 | "secretKey": "my_secret_key", 145 | "enableSsl": "true", 146 | "type": "S3", 147 | }, 148 | }, 149 | ], 150 | "graph": { 151 | "vertices": [ 152 | { 153 | "label": "Person", 154 | "oneToOne": { 155 | "tableSource": { 156 | "catalog": "my_catalog", 157 | "schema": "my_schema", 158 | "table": "person_table", 159 | }, 160 | "id": { 161 | "fields": [ 162 | {"field": "id", "type": "String", "alias": "id"} 163 | ] 164 | }, 165 | "attributes": [ 166 | { 167 | "alias": "name", 168 | "field": "person_name", 169 | "type": "String", 170 | }, 171 | { 172 | "alias": "age", 173 | "field": "person_age", 174 | "type": "Int", 175 | }, 176 | ], 177 | }, 178 | }, 179 | { 180 | "label": "Location", 181 | "oneToOne": { 182 | "tableSource": { 183 | "catalog": "my_catalog", 184 | "schema": "my_schema", 185 | "table": "location_table", 186 | }, 187 | "id": { 188 | "fields": [ 189 | {"field": "id", "type": "String", "alias": "id"} 190 | ] 191 | }, 192 | "attributes": [ 193 | { 194 | "alias": "name", 195 | "field": "location_name", 196 | "type": "String", 197 | }, 198 | { 199 | "alias": "latitude", 200 | "field": "location_lat", 201 | "type": "Float", 202 | }, 203 | { 204 | "alias": "longitude", 205 | "field": "location_long", 206 | "type": "Float", 207 | }, 208 | ], 209 | }, 210 | }, 211 | ], 212 | "edges": [ 213 | { 214 | "label": "LivesIn", 215 | "fromVertex": "Person", 216 | "toVertex": "Location", 217 | "tableSource": { 218 | "catalog": "my_catalog", 219 | "schema": "my_schema", 220 | "table": "lives_in_table", 221 | }, 222 | "id": { 223 | "fields": [ 224 | {"field": "id", "type": "String", "alias": "id"} 225 | ] 226 | }, 227 | "fromId": { 228 | "fields": [ 229 | { 230 | "field": "from_id", 231 | "type": "String", 232 | "alias": "from_id", 233 | } 234 | ] 235 | }, 236 | "toId": { 237 | "fields": [ 238 | { 239 | "field": "to_id", 240 | "type": "String", 241 | "alias": "to_id", 242 | } 243 | ] 244 | }, 245 | "attributes": [ 246 | { 247 | "alias": "since", 248 | "field": "since_date", 249 | "type": "Date", 250 | } 251 | ], 252 | }, 253 | { 254 | "label": "Likes", 255 | "fromVertex": "Person", 256 | "toVertex": "Person", 257 | "tableSource": { 258 | "catalog": "my_catalog", 259 | "schema": "my_schema", 260 | "table": "likes_table", 261 | }, 262 | "id": { 263 | "fields": [ 264 | {"field": "id", "type": "String", "alias": "id"} 265 | ] 266 | }, 267 | "fromId": { 268 | "fields": [ 269 | { 270 | "field": "from_id", 271 | "type": "String", 272 | "alias": "from_id", 273 | } 274 | ] 275 | }, 276 | "toId": { 277 | "fields": [ 278 | { 279 | "field": "to_id", 280 | "type": "String", 281 | "alias": "to_id", 282 | } 283 | ] 284 | }, 285 | "attributes": [], 286 | }, 287 | ], 288 | }, 289 | }, 290 | ) 291 | ], 292 | ) 293 | def test_convert_schema_and_construction_to_host_json(config_dict, expected_json): 294 | """Test conversion of schema and construction to host json.""" 295 | assert convert_mapping_config_to_host_json(config=config_dict) == expected_json 296 | -------------------------------------------------------------------------------- /apps/chatbot/rag_system.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from typing import Dict, List, Any, Optional, Tuple 4 | from dataclasses import dataclass 5 | import chromadb 6 | from sentence_transformers import SentenceTransformer 7 | from anthropic import Anthropic 8 | import os 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | logging.basicConfig(level=logging.INFO) 14 | logger = logging.getLogger("rag_system") 15 | 16 | 17 | @dataclass 18 | class QueryExample: 19 | """Example query with natural language question and corresponding Cypher""" 20 | question: str 21 | cypher: str 22 | description: str 23 | schema_context: Optional[str] = None 24 | 25 | 26 | @dataclass 27 | class PromptConfig: 28 | """Configuration for customizable prompt components""" 29 | role_definition: str = """You are an expert at converting natural language questions to Cypher queries for graph databases.""" 30 | 31 | plan_generation_instruction: str = """First, analyze the question and create a step-by-step plan for generating the appropriate Cypher query.""" 32 | 33 | puppygraph_differences: str = """PUPPYGRAPH DIFFERENCES FROM STANDARD CYPHER: 34 | - PuppyGraph supports standard Cypher syntax 35 | - Use proper node and relationship patterns: (n)-[r]->(m) 36 | - Always list required properties in the query, do not return nodes or relationships without the required properties 37 | - Only return one aggregate value per query, including collect(), count(), size(), type(), etc.""" 38 | 39 | output_format_instruction: str = """OUTPUT FORMAT: 40 | Use the generate_cypher_query tool to create a Cypher query that answers this question. 41 | Provide: 42 | 1. A complete, valid Cypher query 43 | 2. A clear explanation of what the query does 44 | 3. Step-by-step reasoning (optional but helpful)""" 45 | 46 | 47 | @dataclass 48 | class QueryStep: 49 | """A single step in a multi-query execution plan""" 50 | step_number: int 51 | description: str 52 | cypher: str 53 | result: Optional[List[Dict[str, Any]]] = None 54 | error: Optional[str] = None 55 | prompt: Optional[str] = None 56 | llm_response: Optional[str] = None 57 | 58 | 59 | class TextToCypherRAG: 60 | """RAG system for converting natural language to Cypher queries""" 61 | 62 | def __init__(self, 63 | embedding_model: str = "all-MiniLM-L6-v2", 64 | collection_name: str = "cypher_examples", 65 | anthropic_api_key: Optional[str] = None, 66 | prompt_config: Optional[PromptConfig] = None): 67 | 68 | # Initialize embedding model 69 | self.embedding_model = SentenceTransformer(embedding_model) 70 | 71 | # Initialize ChromaDB 72 | self.chroma_client = chromadb.Client() 73 | self.collection = self.chroma_client.get_or_create_collection( 74 | name=collection_name, 75 | metadata={"hnsw:space": "cosine"} 76 | ) 77 | 78 | # Initialize Anthropic client 79 | self.anthropic_client = Anthropic(api_key=anthropic_api_key or os.getenv("ANTHROPIC_API_KEY")) 80 | 81 | # Initialize prompt configuration 82 | self.prompt_config = prompt_config or PromptConfig() 83 | 84 | # Initialize conversation history - maintain context across questions 85 | self.conversation_messages = [] 86 | self.current_schema = None 87 | 88 | # Initialize with some default examples 89 | self._initialize_examples() 90 | 91 | def _initialize_examples(self): 92 | """Initialize the RAG system with some common query examples""" 93 | examples = [ 94 | QueryExample( 95 | question="Show all nodes in the graph", 96 | cypher="MATCH (n) RETURN n LIMIT 100", 97 | description="Returns all nodes with a limit" 98 | ), 99 | QueryExample( 100 | question="Count all nodes", 101 | cypher="MATCH (n) RETURN count(n) as node_count", 102 | description="Counts total number of nodes" 103 | ), 104 | QueryExample( 105 | question="Count all relationships", 106 | cypher="MATCH ()-[r]->() RETURN count(r) as relationship_count", 107 | description="Counts total number of relationships" 108 | ), 109 | QueryExample( 110 | question="Show graph statistics", 111 | cypher="MATCH (n) WITH count(n) as node_count MATCH ()-[r]->() RETURN node_count, count(r) as edge_count", 112 | description="Shows basic graph statistics" 113 | ), 114 | QueryExample( 115 | question="Find nodes with specific property", 116 | cypher="MATCH (n) WHERE n.name IS NOT NULL RETURN n.name LIMIT 10", 117 | description="Finds nodes that have a name property" 118 | ), 119 | QueryExample( 120 | question="Show all relationship types", 121 | cypher="MATCH ()-[r]->() RETURN DISTINCT type(r) as relationship_type", 122 | description="Returns all unique relationship types in the graph" 123 | ), 124 | QueryExample( 125 | question="Show all node labels", 126 | cypher="MATCH (n) RETURN DISTINCT labels(n) as node_labels", 127 | description="Returns all unique node labels in the graph" 128 | ), 129 | QueryExample( 130 | question="Find connected nodes", 131 | cypher="MATCH (n)-[r]-(m) RETURN n, type(r) as relationship, m LIMIT 20", 132 | description="Shows connected nodes with their relationships" 133 | ), 134 | QueryExample( 135 | question="Find shortest path between nodes", 136 | cypher="MATCH p = shortestPath((start)-[*]-(end)) WHERE id(start) = $start_id AND id(end) = $end_id RETURN p", 137 | description="Finds shortest path between two specific nodes" 138 | ), 139 | QueryExample( 140 | question="Find nodes by degree", 141 | cypher="MATCH (n) WITH n, size((n)--()) as degree WHERE degree > 5 RETURN n, degree ORDER BY degree DESC", 142 | description="Finds nodes with high connectivity (degree > 5)" 143 | ) 144 | ] 145 | 146 | self._add_examples_to_collection(examples) 147 | 148 | def _add_examples_to_collection(self, examples: List[QueryExample]): 149 | """Add examples to the ChromaDB collection""" 150 | if not examples: 151 | return 152 | 153 | # Check if examples already exist 154 | existing_count = self.collection.count() 155 | if existing_count >= len(examples): 156 | logger.info(f"Examples already exist in collection ({existing_count} items)") 157 | return 158 | 159 | # Prepare data for ChromaDB 160 | questions = [ex.question for ex in examples] 161 | embeddings = self.embedding_model.encode(questions).tolist() 162 | 163 | ids = [f"example_{i}" for i in range(len(examples))] 164 | documents = questions 165 | metadatas = [ 166 | { 167 | "cypher": ex.cypher, 168 | "description": ex.description, 169 | "schema_context": ex.schema_context or "" 170 | } 171 | for ex in examples 172 | ] 173 | 174 | # Add to collection 175 | self.collection.add( 176 | ids=ids, 177 | embeddings=embeddings, 178 | documents=documents, 179 | metadatas=metadatas 180 | ) 181 | 182 | logger.info(f"Added {len(examples)} examples to the collection") 183 | 184 | def add_example(self, example: QueryExample): 185 | """Add a single example to the RAG system""" 186 | self._add_examples_to_collection([example]) 187 | 188 | def update_prompt_config(self, prompt_config: PromptConfig): 189 | """Update the prompt configuration""" 190 | self.prompt_config = prompt_config 191 | logger.info("Prompt configuration updated") 192 | 193 | def get_prompt_config(self) -> PromptConfig: 194 | """Get the current prompt configuration""" 195 | return self.prompt_config 196 | 197 | def _build_system_prompt(self, schema: Dict[str, Any]) -> str: 198 | """Build the system prompt with static information that doesn't change during conversation""" 199 | 200 | # Format schema information 201 | schema_info = self._format_schema_for_prompt(schema) 202 | 203 | # Get similar examples for general context 204 | examples_text = "\n".join([ 205 | f"Q: {ex['question']}\nCypher: {ex['cypher']}\nDescription: {ex['description']}\n" 206 | for ex in self.find_similar_examples("graph query examples", k=5) 207 | ]) 208 | 209 | system_prompt = f"""{self.prompt_config.role_definition} 210 | 211 | {self.prompt_config.plan_generation_instruction} 212 | 213 | GRAPH SCHEMA: 214 | {schema_info} 215 | 216 | EXAMPLE QUERY PATTERNS: 217 | {examples_text} 218 | 219 | {self.prompt_config.puppygraph_differences} 220 | 221 | RULES: 222 | 1. Always use proper Cypher syntax 223 | 2. Include appropriate LIMIT clauses for large result sets 224 | 3. Use parameterized queries when possible 225 | 4. Consider the graph schema when writing queries 226 | 5. Return only valid, executable Cypher 227 | 6. Be conservative with result sizes (use LIMIT 100 or less by default) 228 | 7. IMPORTANT: You have a limited number of query rounds. When approaching the limit, prioritize gathering the most essential information. 229 | 8. CRITICAL: When you reach the maximum number of rounds, you MUST stop and provide a comprehensive summary based on all gathered data. 230 | 231 | {self.prompt_config.output_format_instruction} 232 | 233 | You will be asked to help answer questions by generating Cypher queries step by step. Use the tools provided to generate queries or make decisions about when to stop and provide a final answer. Consider previous conversation context when making decisions.""" 234 | 235 | return system_prompt 236 | 237 | def _update_schema_if_needed(self, schema: Dict[str, Any]): 238 | """Update the schema and rebuild system prompt if schema has changed""" 239 | if self.current_schema != schema: 240 | self.current_schema = schema 241 | # Only reset conversation if this is truly a new schema (not just the first time) 242 | if hasattr(self, 'conversation_messages') and self.conversation_messages: 243 | logger.warning("Schema changed mid-conversation, resetting conversation history") 244 | self.conversation_messages = [] 245 | logger.info("Schema updated") 246 | 247 | def clear_conversation(self): 248 | """Clear the conversation history (but keep the current schema/system prompt)""" 249 | self.conversation_messages = [] 250 | 251 | def get_system_prompt(self) -> str: 252 | """Get the current system prompt""" 253 | if self.current_schema: 254 | return self._build_system_prompt(self.current_schema) 255 | return "" 256 | 257 | def find_similar_examples(self, question: str, k: int = 3) -> List[Dict[str, Any]]: 258 | """Find similar examples for a given question""" 259 | # Generate embedding for the question 260 | question_embedding = self.embedding_model.encode([question]).tolist()[0] 261 | 262 | # Search in ChromaDB 263 | results = self.collection.query( 264 | query_embeddings=[question_embedding], 265 | n_results=k, 266 | include=["documents", "metadatas", "distances"] 267 | ) 268 | 269 | # Format results 270 | similar_examples = [] 271 | if results["ids"]: 272 | for i in range(len(results["ids"][0])): 273 | similar_examples.append({ 274 | "question": results["documents"][0][i], 275 | "cypher": results["metadatas"][0][i]["cypher"], 276 | "description": results["metadatas"][0][i]["description"], 277 | "similarity": 1 - results["distances"][0][i] # Convert distance to similarity 278 | }) 279 | 280 | return similar_examples 281 | 282 | def _format_schema_for_prompt(self, schema: Dict[str, Any]) -> str: 283 | """Format schema information for the LLM prompt""" 284 | schema_text = "VERTICES:\n" 285 | 286 | for vertex in schema.get("vertices", []): 287 | label = vertex.get("label", "Unknown") 288 | attributes = vertex.get("attributes", []) 289 | description = vertex.get("description", "").strip() 290 | 291 | # Label on its own line 292 | schema_text += f"- {label}\n" 293 | 294 | # Description section (if available) 295 | if description: 296 | schema_text += f" Description: {description}\n" 297 | 298 | # Attributes 299 | if attributes: 300 | attr_text = ", ".join([f"{attr['name']}:{attr['type']}" for attr in attributes]) 301 | schema_text += f" Attributes: {attr_text}\n" 302 | else: 303 | schema_text += f" Attributes: (none)\n" 304 | 305 | schema_text += "\n" 306 | 307 | schema_text += "EDGES:\n" 308 | for edge in schema.get("edges", []): 309 | label = edge.get("label", "Unknown") 310 | from_vertex = edge.get("from", "Unknown") 311 | to_vertex = edge.get("to", "Unknown") 312 | attributes = edge.get("attributes", []) 313 | description = edge.get("description", "").strip() 314 | 315 | # Edge pattern on its own line 316 | schema_text += f"- (:{from_vertex})-[:{label}]->(:{to_vertex})\n" 317 | 318 | # Description section (if available) 319 | if description: 320 | schema_text += f" Description: {description}\n" 321 | 322 | # Attributes 323 | if attributes: 324 | attr_text = ", ".join([f"{attr['name']}:{attr['type']}" for attr in attributes]) 325 | schema_text += f" Attributes: {attr_text}\n" 326 | else: 327 | schema_text += f" Attributes: (none)\n" 328 | 329 | schema_text += "\n" 330 | 331 | return schema_text 332 | 333 | def generate_next_query(self, 334 | question: str, 335 | schema: Dict[str, Any], 336 | previous_steps: List[QueryStep], 337 | max_rounds: int = 5) -> Tuple[str, str, bool, str, str]: 338 | """Generate the next query in a multi-round execution, or decide to stop""" 339 | 340 | # Ensure we have the schema set up in our conversation 341 | self._update_schema_if_needed(schema) 342 | 343 | # If this is the start of a new question, add it to conversation 344 | if not previous_steps: 345 | self.conversation_messages.append({ 346 | "role": "user", 347 | "content": f"Please help me answer this question: {question}" 348 | }) 349 | 350 | # Debug: Print conversation history at each round 351 | logger.info(f"=== Round {len(previous_steps) + 1} - Conversation History ({len(self.conversation_messages)} messages) ===") 352 | for i, msg in enumerate(self.conversation_messages): 353 | logger.info(f" {i+1}. {msg['role']}: {msg['content'][:100]}...") 354 | logger.info("=== End Conversation History ===") 355 | 356 | 357 | # Define the tool for multi-round query generation 358 | multi_round_tool = { 359 | "name": "multi_round_query_decision", 360 | "description": "Decide whether to generate another query or provide final answer in multi-round execution", 361 | "input_schema": { 362 | "type": "object", 363 | "properties": { 364 | "action": { 365 | "type": "string", 366 | "enum": ["CONTINUE", "STOP"], 367 | "description": "Whether to continue with another query or stop and provide final answer. You MUST choose STOP if you're at the maximum number of rounds." 368 | }, 369 | "cypher_query": { 370 | "type": "string", 371 | "description": "The next Cypher query to execute (only if action is CONTINUE)" 372 | }, 373 | "explanation": { 374 | "type": "string", 375 | "description": "Explanation of what this query will do (if CONTINUE) or the final answer to the original question (if STOP)" 376 | }, 377 | "final_answer": { 378 | "type": "string", 379 | "description": "The final answer to the original question (if STOP). This should be a comprehensive summary based on all gathered data. Do not include query details or reasoning - focus on answering the user's original question directly." 380 | }, 381 | "reasoning": { 382 | "type": "string", 383 | "description": "Reasoning for why you chose to continue or stop" 384 | } 385 | }, 386 | "required": ["action", "explanation"] 387 | } 388 | } 389 | 390 | # Add the current request to ask what to do next 391 | current_round = len(previous_steps) + 1 392 | remaining_rounds = max_rounds - len(previous_steps) 393 | 394 | if previous_steps: 395 | if current_round >= max_rounds: 396 | # Force summarization on last round 397 | current_request = f"I have executed {len(previous_steps)} queries and this is my final round (round {max_rounds}). I must now STOP and provide a comprehensive final answer to the original question '{question}' based on all the information I've gathered." 398 | elif current_round == max_rounds - 1: 399 | # Warn about approaching limit 400 | current_request = f"I have executed {len(previous_steps)} queries so far and only have {remaining_rounds} round left. Based on the results from our conversation, should I continue with one final query or do I have enough information to answer the original question '{question}'? If I continue, the next round will be my last." 401 | else: 402 | current_request = f"I have executed {len(previous_steps)} queries so far ({remaining_rounds} rounds remaining). Based on the results from our conversation, should I continue with another query or do I have enough information to answer the original question '{question}'?" 403 | else: 404 | current_request = f"I need to answer the question: '{question}'. Should I execute a query to gather information or do I already have what I need? I have up to {max_rounds} rounds available." 405 | 406 | # Create a copy of messages for this request (don't permanently add the request) 407 | current_messages = self.conversation_messages + [{"role": "user", "content": current_request}] 408 | 409 | # Build simple prompt that will be shown in UI (the real context is in conversation history) 410 | display_prompt = f"Question: {question}\nCurrent request: {current_request}" 411 | 412 | try: 413 | response = self.anthropic_client.messages.create( 414 | model="claude-sonnet-4-20250514", 415 | max_tokens=2000, 416 | temperature=0, 417 | system=self.get_system_prompt(), 418 | tools=[multi_round_tool], 419 | tool_choice={"type": "tool", "name": "multi_round_query_decision"}, 420 | messages=current_messages 421 | ) 422 | 423 | # Extract tool use result 424 | if response.content and response.content[0].type == "tool_use": 425 | tool_input = response.content[0].input 426 | action = tool_input.get("action", "STOP") 427 | explanation = tool_input.get("explanation", "") 428 | reasoning = tool_input.get("reasoning", "") 429 | 430 | # Format the raw response for display 431 | raw_response = f"Multi-round tool response:\n{json.dumps(tool_input, indent=2)}" 432 | 433 | if action == "STOP" or current_round >= max_rounds: 434 | # Force stop if we've reached max rounds, regardless of LLM decision 435 | if current_round >= max_rounds and action == "CONTINUE": 436 | logger.warning(f"LLM tried to continue past max_rounds ({max_rounds}), forcing stop") 437 | action = "STOP" 438 | explanation = "Reached maximum number of query rounds - providing summary based on gathered data." 439 | 440 | # Include reasoning in the final answer if provided 441 | final_answer = tool_input.get("final_answer", "") 442 | if not final_answer: 443 | final_answer = explanation 444 | if reasoning: 445 | final_answer = f"{explanation}\n\nReasoning: {reasoning}" 446 | 447 | # Add the final response to conversation history 448 | self.conversation_messages.append({ 449 | "role": "assistant", 450 | "content": f"I have enough information to answer. Final answer: {final_answer}" 451 | }) 452 | 453 | return "", final_answer, True, display_prompt, raw_response 454 | else: 455 | # Continue with next query 456 | cypher_query = tool_input.get("cypher_query", "") 457 | if not cypher_query: 458 | logger.warning("No cypher query provided for CONTINUE action") 459 | return "MATCH (n) RETURN count(n) as count", "No query provided", False, display_prompt, raw_response 460 | 461 | # Include reasoning in explanation if provided 462 | full_explanation = explanation 463 | if reasoning: 464 | full_explanation = f"{explanation} (Reasoning: {reasoning})" 465 | 466 | # Add the decision to conversation history so Claude can see its own reasoning 467 | self.conversation_messages.append({ 468 | "role": "assistant", 469 | "content": f"I decided to continue with another query: {cypher_query}\nExplanation: {full_explanation}" 470 | }) 471 | 472 | return cypher_query, full_explanation, False, display_prompt, raw_response 473 | else: 474 | logger.warning("No tool use found in multi-round response") 475 | return "MATCH (n) RETURN count(n) as count", "Error: No valid tool response received", True, display_prompt, "No tool use found" 476 | 477 | except Exception as e: 478 | logger.error(f"Error generating next query: {e}") 479 | return "MATCH (n) RETURN count(n) as count", f"Error: {str(e)}", True, "", f"Error: {str(e)}" 480 | 481 | def generate_final_answer_from_steps(self, question: str, executed_steps: List[QueryStep]) -> str: 482 | """Generate final answer using LLM to analyze conversation history""" 483 | 484 | if not executed_steps: 485 | return "I wasn't able to execute any queries to answer your question." 486 | 487 | # Ask Claude to summarize based on the conversation history 488 | summary_request = f"Based on our conversation and the {len(executed_steps)} queries I've executed, please provide a comprehensive answer to the original question: '{question}'. Summarize what we learned and provide the best answer you can based on the data we gathered." 489 | 490 | current_messages = self.conversation_messages + [{"role": "user", "content": summary_request}] 491 | 492 | try: 493 | response = self.anthropic_client.messages.create( 494 | model="claude-sonnet-4-20250514", 495 | max_tokens=2000, 496 | temperature=1, 497 | thinking={ 498 | "type": "enabled", 499 | "budget_tokens": 1600 500 | }, 501 | system=self.get_system_prompt(), 502 | messages=current_messages 503 | ) 504 | 505 | if response.content and response.content[0].type == "text": 506 | return response.content[0].text 507 | else: 508 | # Fallback to basic summary 509 | return f"I executed {len(executed_steps)} queries to gather information, but couldn't generate a proper summary." 510 | 511 | except Exception as e: 512 | logger.error(f"Error generating final answer: {e}") 513 | return f"I executed {len(executed_steps)} queries but encountered an error when summarizing: {str(e)}" -------------------------------------------------------------------------------- /apps/chatbot/backend.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import logging 4 | import subprocess 5 | import time 6 | from typing import Dict, Any, List, Optional, Tuple 7 | from dataclasses import asdict 8 | import requests 9 | from rag_system import TextToCypherRAG, QueryExample, PromptConfig 10 | 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger("backend") 13 | 14 | 15 | class PuppyGraphChatbot: 16 | """Main chatbot backend that coordinates MCP server, RAG system, and PuppyGraph""" 17 | 18 | def __init__(self, 19 | puppygraph_bolt_uri: str = "bolt://localhost:7687", 20 | puppygraph_http_uri: str = "http://localhost:8081", 21 | puppygraph_username: str = "puppygraph", 22 | puppygraph_password: str = "puppygraph123", 23 | prompt_config: Optional[PromptConfig] = None): 24 | 25 | self.puppygraph_bolt_uri = puppygraph_bolt_uri 26 | self.puppygraph_http_uri = puppygraph_http_uri 27 | self.puppygraph_username = puppygraph_username 28 | self.puppygraph_password = puppygraph_password 29 | 30 | # Initialize RAG system with optional prompt configuration 31 | self.rag_system = TextToCypherRAG(prompt_config=prompt_config) 32 | 33 | # MCP server process 34 | self.mcp_process = None 35 | 36 | # Cache for schema and frequently used data 37 | self.schema_cache = None 38 | self.schema_cache_time = 0 39 | self.cache_duration = 300 # 5 minutes 40 | 41 | # Conversation history 42 | self.conversation_history: List[Dict[str, Any]] = [] 43 | 44 | async def start_mcp_server(self): 45 | """Start the MCP server process""" 46 | try: 47 | # Set environment variables for MCP server 48 | env = { 49 | "PUPPYGRAPH_BOLT_URI": self.puppygraph_bolt_uri, 50 | "PUPPYGRAPH_HTTP_URI": self.puppygraph_http_uri, 51 | "PUPPYGRAPH_USERNAME": self.puppygraph_username, 52 | "PUPPYGRAPH_PASSWORD": self.puppygraph_password 53 | } 54 | 55 | self.mcp_process = subprocess.Popen( 56 | ["python", "mcp_server.py"], 57 | stdin=subprocess.PIPE, 58 | stdout=subprocess.PIPE, 59 | stderr=subprocess.PIPE, 60 | text=True, 61 | env=env 62 | ) 63 | 64 | # Give it a moment to start 65 | await asyncio.sleep(2) 66 | 67 | if self.mcp_process.poll() is None: 68 | logger.info("MCP server started successfully") 69 | return True 70 | else: 71 | logger.error("MCP server failed to start") 72 | return False 73 | 74 | except Exception as e: 75 | logger.error(f"Error starting MCP server: {e}") 76 | return False 77 | 78 | def stop_mcp_server(self): 79 | """Stop the MCP server process""" 80 | if self.mcp_process and self.mcp_process.poll() is None: 81 | self.mcp_process.terminate() 82 | self.mcp_process.wait() 83 | logger.info("MCP server stopped") 84 | 85 | def get_schema(self) -> Dict[str, Any]: 86 | """Get schema from PuppyGraph with caching""" 87 | current_time = time.time() 88 | 89 | # Return cached schema if still valid 90 | if (self.schema_cache and 91 | current_time - self.schema_cache_time < self.cache_duration): 92 | return self.schema_cache 93 | 94 | try: 95 | response = requests.get( 96 | f"{self.puppygraph_http_uri}/schemajson", 97 | auth=(self.puppygraph_username, self.puppygraph_password), 98 | timeout=10 99 | ) 100 | response.raise_for_status() 101 | 102 | raw_schema = response.json() 103 | 104 | # Convert PuppyGraph schema format to our expected format 105 | converted_schema = self._convert_puppygraph_schema(raw_schema) 106 | 107 | self.schema_cache = converted_schema 108 | self.schema_cache_time = current_time 109 | 110 | return self.schema_cache 111 | 112 | except Exception as e: 113 | logger.error(f"Error fetching schema: {e}") 114 | # Return cached schema if available, otherwise empty schema 115 | return self.schema_cache or {"vertices": [], "edges": []} 116 | 117 | def _convert_puppygraph_schema(self, raw_schema: Dict[str, Any]) -> Dict[str, Any]: 118 | """Convert PuppyGraph schema format to our expected format based on graph_schema.proto""" 119 | 120 | try: 121 | # Extract graph definition (non-deprecated format) 122 | graph_def = raw_schema.get("graph", {}) 123 | 124 | # Convert vertices from Graph.VertexSchema format 125 | vertices = [] 126 | for vertex in graph_def.get("vertices", []): 127 | converted_vertex = { 128 | "label": vertex.get("label", "Unknown"), 129 | "attributes": [], 130 | "description": vertex.get("description", "") 131 | } 132 | 133 | # Handle OneToOne mapping (most common) 134 | one_to_one = vertex.get("oneToOne", {}) 135 | if one_to_one: 136 | # Extract attributes from MappedField format 137 | attributes = one_to_one.get("attributes", []) 138 | for attr in attributes: 139 | converted_vertex["attributes"].append({ 140 | "name": attr.get("alias", attr.get("field", "unknown")), 141 | "type": self._map_puppygraph_type(attr.get("type", "String")) 142 | }) 143 | 144 | # Handle ManyToOne mapping if present 145 | many_to_one = vertex.get("manyToOne", {}) 146 | if many_to_one: 147 | # For ManyToOne, we'll just show it has complex mapping 148 | converted_vertex["attributes"].append({ 149 | "name": "complex_mapping", 150 | "type": "ManyToOne" 151 | }) 152 | 153 | vertices.append(converted_vertex) 154 | 155 | # Convert edges from Graph.EdgeSchema format 156 | edges = [] 157 | for edge in graph_def.get("edges", []): 158 | converted_edge = { 159 | "label": edge.get("label", "Unknown"), 160 | "from": edge.get("fromVertex", "Unknown"), 161 | "to": edge.get("toVertex", "Unknown"), 162 | "attributes": [], 163 | "description": edge.get("description", "") 164 | } 165 | 166 | # Extract attributes from MappedField format 167 | attributes = edge.get("attributes", []) 168 | for attr in attributes: 169 | converted_edge["attributes"].append({ 170 | "name": attr.get("alias", attr.get("field", "unknown")), 171 | "type": self._map_puppygraph_type(attr.get("type", "String")) 172 | }) 173 | 174 | edges.append(converted_edge) 175 | 176 | return { 177 | "vertices": vertices, 178 | "edges": edges 179 | } 180 | 181 | except Exception as e: 182 | logger.error(f"Error converting PuppyGraph schema: {e}") 183 | return {"vertices": [], "edges": []} 184 | 185 | def _map_puppygraph_type(self, puppygraph_type: str) -> str: 186 | """Map PuppyGraph types to standard types""" 187 | type_mapping = { 188 | "String": "String", 189 | "Int": "Integer", 190 | "Double": "Double", 191 | "Boolean": "Boolean", 192 | "Long": "Long", 193 | "Float": "Float" 194 | } 195 | return type_mapping.get(puppygraph_type, "String") 196 | 197 | def execute_cypher_direct(self, query: str, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: 198 | """Execute Cypher query directly against PuppyGraph""" 199 | from neo4j import GraphDatabase 200 | from neo4j.exceptions import ServiceUnavailable, AuthError 201 | 202 | try: 203 | driver = GraphDatabase.driver( 204 | self.puppygraph_bolt_uri, 205 | auth=(self.puppygraph_username, self.puppygraph_password) 206 | ) 207 | 208 | with driver.session() as session: 209 | result = session.run(query, params or {}) 210 | records = [record.data() for record in result] 211 | 212 | driver.close() 213 | 214 | return { 215 | "success": True, 216 | "data": records, 217 | "query": query, 218 | "record_count": len(records) 219 | } 220 | 221 | except ServiceUnavailable as e: 222 | error_msg = f"PuppyGraph server not available at {self.puppygraph_bolt_uri}. Please ensure PuppyGraph is running." 223 | logger.error(error_msg) 224 | return { 225 | "success": False, 226 | "error": error_msg, 227 | "query": query 228 | } 229 | except AuthError as e: 230 | error_msg = f"Authentication failed. Check PuppyGraph credentials." 231 | logger.error(error_msg) 232 | return { 233 | "success": False, 234 | "error": error_msg, 235 | "query": query 236 | } 237 | except Exception as e: 238 | logger.error(f"Error executing Cypher query: {e}") 239 | return { 240 | "success": False, 241 | "error": str(e), 242 | "query": query 243 | } 244 | 245 | def process_natural_language_query_streaming(self, question: str): 246 | """Process a natural language question with streaming progress updates""" 247 | 248 | try: 249 | # Get current schema 250 | schema = self.get_schema() 251 | 252 | # Initialize progress 253 | executed_steps = [] 254 | max_rounds = 5 255 | 256 | # Yield initial status 257 | yield self._format_streaming_update("🤖 Starting multi-round query execution...", executed_steps, None, question) 258 | 259 | for round_num in range(1, max_rounds + 1): 260 | # Generate next query or decision to stop 261 | yield self._format_streaming_update(f"🔄 Round {round_num}: Analyzing question and generating query...", executed_steps, None, question) 262 | 263 | cypher, explanation, should_stop, prompt, llm_response = self.rag_system.generate_next_query( 264 | question, schema, executed_steps, max_rounds 265 | ) 266 | 267 | if should_stop: 268 | # Final answer ready 269 | final_result = { 270 | "question": question, 271 | "executed_steps": executed_steps, 272 | "final_answer": explanation, 273 | "total_rounds": round_num - 1, 274 | "success": True, 275 | "stopped_reason": "LLM determined sufficient information gathered" 276 | } 277 | self.conversation_history.append(final_result) 278 | yield self._format_streaming_update("✅ Analysis complete!", executed_steps, explanation, question, final=True) 279 | return 280 | 281 | # Show generated query 282 | yield self._format_streaming_update(f"📝 Generated query for round {round_num}", executed_steps, None, question, current_query=cypher, current_description=explanation, current_prompt=prompt, current_llm_response=llm_response) 283 | 284 | # Create query step 285 | from rag_system import QueryStep 286 | query_step = QueryStep( 287 | step_number=round_num, 288 | description=explanation, 289 | cypher=cypher, 290 | prompt=prompt, 291 | llm_response=llm_response 292 | ) 293 | 294 | # Execute the query 295 | yield self._format_streaming_update(f"⚡ Executing query {round_num}...", executed_steps, None, question, current_query=cypher) 296 | 297 | try: 298 | execution_result = self.execute_cypher_direct(cypher) 299 | if execution_result.get("success", False): 300 | query_step.result = execution_result.get("data", []) 301 | result_summary = f"✅ Query {round_num} completed: {len(query_step.result)} records returned" 302 | else: 303 | query_step.error = execution_result.get("error", "Unknown error") 304 | result_summary = f"❌ Query {round_num} failed: {query_step.error}" 305 | except Exception as e: 306 | query_step.error = str(e) 307 | result_summary = f"❌ Query {round_num} error: {str(e)}" 308 | 309 | executed_steps.append(query_step) 310 | 311 | # Add query result to RAG system conversation history 312 | if query_step.result is not None: 313 | total_records = len(query_step.result) 314 | rag_result_summary = f"Query executed successfully. Found {total_records} records." 315 | if query_step.result: 316 | sample = query_step.result[:5] # Show first 5 records as sample 317 | rag_result_summary += f" Sample data (showing {len(sample)} of {total_records}): {json.dumps(sample, default=str)}" 318 | if total_records > 5: 319 | rag_result_summary += f" (Note: {total_records - 5} additional records omitted)" 320 | elif query_step.error: 321 | rag_result_summary = f"Query failed with error: {query_step.error}" 322 | else: 323 | rag_result_summary = "Query executed with no result data." 324 | 325 | self.rag_system.conversation_messages.append({ 326 | "role": "assistant", 327 | "content": f"I executed this query: {query_step.cypher}\nResult: {rag_result_summary}" 328 | }) 329 | 330 | # Show execution result 331 | yield self._format_streaming_update(result_summary, executed_steps, None, question) 332 | 333 | # If we reach max rounds, force stop 334 | final_answer = self.rag_system.generate_final_answer_from_steps(question, executed_steps) 335 | final_result = { 336 | "question": question, 337 | "executed_steps": executed_steps, 338 | "final_answer": final_answer, 339 | "total_rounds": max_rounds, 340 | "success": True, 341 | "stopped_reason": f"Reached maximum rounds ({max_rounds})" 342 | } 343 | self.conversation_history.append(final_result) 344 | yield self._format_streaming_update("🛑 Maximum rounds reached", executed_steps, final_answer, question, final=True) 345 | 346 | except Exception as e: 347 | logger.error(f"Error in streaming processing: {e}") 348 | yield f"❌ Error: {str(e)}" 349 | 350 | def _format_streaming_update(self, status: str, executed_steps, final_answer: str = None, question: str = "", final: bool = False, current_query: str = None, current_description: str = None, current_prompt: str = None, current_llm_response: str = None) -> str: 351 | """Format a streaming progress update in chronological order""" 352 | 353 | response = f"**Question:** {question}\n\n" 354 | 355 | # If we have a final answer, show it prominently first and collapse the details 356 | if final_answer and final: 357 | response += f"**🎯 Final Answer:**\n{final_answer}\n\n" 358 | response += f"---\n\n" 359 | 360 | # Put all processing details in a collapsible section 361 | details_content = f"**Status:** {status}\n\n" 362 | 363 | # System configuration in details 364 | system_prompt = self.rag_system.get_system_prompt() 365 | if system_prompt: 366 | details_content += f"**🔧 System Configuration:**\n" 367 | details_content += f"
View System Prompt (Schema, Rules, Examples)\n\n```\n{system_prompt}\n```\n
\n\n" 368 | 369 | # Processing steps in details 370 | if executed_steps: 371 | details_content += f"**Processing Steps ({len(executed_steps)}):**\n\n" 372 | for step in executed_steps: 373 | details_content += f"---\n**Step {step.step_number}:** {step.description}\n\n" 374 | 375 | # LLM request for this step 376 | if hasattr(step, 'prompt') and step.prompt: 377 | details_content += f"**🤖 LLM Request:**\n```\n{step.prompt}\n```\n\n" 378 | 379 | # LLM response (tool use) 380 | if hasattr(step, 'llm_response') and step.llm_response: 381 | details_content += f"**🤖 LLM Response:**\n```json\n{step.llm_response}\n```\n\n" 382 | 383 | # Generated Cypher query 384 | details_content += f"**🔗 Cypher Query:**\n```cypher\n{step.cypher}\n```\n\n" 385 | 386 | # Query execution results 387 | if step.result is not None: 388 | details_content += f"**📊 Query Results:** ✅ {len(step.result)} records returned\n" 389 | if step.result: 390 | sample_size = min(5, len(step.result)) 391 | details_content += f"**Sample Data (showing {sample_size} of {len(step.result)} records):**\n" 392 | details_content += f"```json\n{json.dumps(step.result[:sample_size], indent=2, default=str)}\n```\n" 393 | if len(step.result) > sample_size: 394 | details_content += f"... and {len(step.result) - sample_size} more records\n" 395 | details_content += "\n" 396 | elif step.error: 397 | details_content += f"**Query Error:** ❌ {step.error}\n\n" 398 | 399 | details_content += "\n" 400 | 401 | # Wrap all details in a collapsible section 402 | response += f"
📋 Click to view detailed processing steps ({len(executed_steps)} queries executed)\n\n{details_content}\n
\n" 403 | 404 | return response 405 | 406 | # If not final, show the regular streaming format 407 | response += f"**Status:** {status}\n\n" 408 | 409 | # 1. System prompt (always first) 410 | system_prompt = self.rag_system.get_system_prompt() 411 | if system_prompt: 412 | response += f"**🔧 System Configuration:**\n" 413 | response += f"
View System Prompt (Schema, Rules, Examples)\n\n```\n{system_prompt}\n```\n
\n\n" 414 | 415 | # 2. Initial user query (what started everything) 416 | response += f"**🧑 User Query:** {question}\n\n" 417 | 418 | # 3. Show completed steps in chronological order 419 | if executed_steps: 420 | response += f"**Processing Steps ({len(executed_steps)}):**\n\n" 421 | for step in executed_steps: 422 | response += f"---\n**Step {step.step_number}:** {step.description}\n\n" 423 | 424 | # 3a. LLM request for this step 425 | if hasattr(step, 'prompt') and step.prompt: 426 | response += f"**🤖 LLM Request:**\n```\n{step.prompt}\n```\n\n" 427 | 428 | # 3b. LLM response (tool use) 429 | if hasattr(step, 'llm_response') and step.llm_response: 430 | response += f"**🤖 LLM Response:**\n```json\n{step.llm_response}\n```\n\n" 431 | 432 | # 3c. Generated Cypher query 433 | response += f"**🔗 Cypher Query:**\n```cypher\n{step.cypher}\n```\n\n" 434 | 435 | # 3d. Query execution results 436 | if step.result is not None: 437 | response += f"**📊 Query Results:** ✅ {len(step.result)} records returned\n" 438 | if step.result: 439 | sample_size = min(5, len(step.result)) 440 | response += f"**Sample Data (showing {sample_size} of {len(step.result)} records):**\n" 441 | response += f"```json\n{json.dumps(step.result[:sample_size], indent=2, default=str)}\n```\n" 442 | if len(step.result) > sample_size: 443 | response += f"... and {len(step.result) - sample_size} more records\n" 444 | response += "\n" 445 | elif step.error: 446 | response += f"**Query Error:** ❌ {step.error}\n\n" 447 | 448 | response += "\n" 449 | 450 | # 4. Show current step in progress (if any) 451 | if current_query and not final: 452 | step_num = len(executed_steps) + 1 453 | response += f"---\n**Step {step_num} (In Progress):** {current_description or 'Processing...'}\n\n" 454 | 455 | # 4a. LLM request for current step 456 | if current_prompt: 457 | response += f"**🤖 LLM Request:**\n```\n{current_prompt}\n```\n\n" 458 | 459 | # 4b. LLM response for current step 460 | if current_llm_response: 461 | response += f"**🤖 LLM Response:**\n```json\n{current_llm_response}\n```\n\n" 462 | 463 | # 4c. Generated Cypher query 464 | response += f"**🔗 Cypher Query:**\n```cypher\n{current_query}\n```\n\n" 465 | response += f"**📊 Status:** Executing query...\n\n" 466 | 467 | return response 468 | 469 | def add_query_example(self, question: str, cypher: str, description: str) -> bool: 470 | """Add a new query example to the RAG system""" 471 | try: 472 | example = QueryExample( 473 | question=question, 474 | cypher=cypher, 475 | description=description 476 | ) 477 | self.rag_system.add_example(example) 478 | return True 479 | except Exception as e: 480 | logger.error(f"Error adding query example: {e}") 481 | return False 482 | 483 | def get_conversation_history(self, limit: int = 10) -> List[Dict[str, Any]]: 484 | """Get recent conversation history""" 485 | return self.conversation_history[-limit:] if self.conversation_history else [] 486 | 487 | def clear_conversation_history(self): 488 | """Clear conversation history""" 489 | self.conversation_history.clear() 490 | # Also clear the RAG system's conversation history 491 | self.rag_system.clear_conversation() 492 | 493 | def update_prompt_config(self, prompt_config: PromptConfig): 494 | """Update the prompt configuration for the RAG system""" 495 | self.rag_system.update_prompt_config(prompt_config) 496 | logger.info("Chatbot prompt configuration updated") 497 | 498 | def get_prompt_config(self) -> PromptConfig: 499 | """Get the current prompt configuration""" 500 | return self.rag_system.get_prompt_config() 501 | 502 | def get_graph_stats(self) -> Dict[str, Any]: 503 | """Get basic graph statistics""" 504 | try: 505 | stats_query = """ 506 | MATCH (n) 507 | WITH count(n) as node_count 508 | MATCH ()-[r]->() 509 | RETURN node_count, count(r) as edge_count 510 | """ 511 | 512 | result = self.execute_cypher_direct(stats_query) 513 | 514 | if result["success"] and result["data"]: 515 | stats = result["data"][0] 516 | 517 | # Get node labels and relationship types separately 518 | node_labels = [] 519 | relationship_types = [] 520 | 521 | try: 522 | # Try to get distinct node labels 523 | labels_result = self.execute_cypher_direct("MATCH (n) RETURN DISTINCT labels(n) as node_labels LIMIT 20") 524 | if labels_result["success"]: 525 | node_labels = [item["node_labels"][0] for item in labels_result["data"] if item.get("node_labels")] 526 | except: 527 | pass 528 | 529 | try: 530 | # Try to get distinct relationship types 531 | types_result = self.execute_cypher_direct("MATCH ()-[r]->() RETURN DISTINCT type(r) as relationship_type LIMIT 20") 532 | if types_result["success"]: 533 | relationship_types = [item["relationship_type"] for item in types_result["data"]] 534 | except: 535 | pass 536 | 537 | return { 538 | "node_count": stats.get("node_count", 0), 539 | "edge_count": stats.get("edge_count", 0), 540 | "node_labels": node_labels, 541 | "relationship_types": relationship_types 542 | } 543 | else: 544 | return {"node_count": 0, "edge_count": 0, "node_labels": [], "relationship_types": []} 545 | 546 | except Exception as e: 547 | logger.error(f"Error getting graph stats: {e}") 548 | return {"error": str(e)} 549 | 550 | def cleanup(self): 551 | """Cleanup resources""" 552 | self.stop_mcp_server() 553 | 554 | 555 | # Global chatbot instance 556 | chatbot = None 557 | 558 | def get_chatbot() -> PuppyGraphChatbot: 559 | """Get or create the global chatbot instance""" 560 | global chatbot 561 | if chatbot is None: 562 | chatbot = PuppyGraphChatbot() 563 | return chatbot 564 | 565 | def shutdown_chatbot(): 566 | """Shutdown the global chatbot instance""" 567 | global chatbot 568 | if chatbot: 569 | chatbot.cleanup() 570 | chatbot = None -------------------------------------------------------------------------------- /apps/chatbot/gradio_app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import gradio as gr 4 | import json 5 | import time 6 | from typing import List, Tuple, Dict, Any 7 | import logging 8 | 9 | from backend import get_chatbot, shutdown_chatbot 10 | from rag_system import PromptConfig 11 | 12 | logging.basicConfig(level=logging.INFO) 13 | logger = logging.getLogger("gradio_app") 14 | 15 | # Global variables for maintaining state 16 | chatbot_instance = None 17 | 18 | 19 | def initialize_chatbot(): 20 | """Initialize the chatbot backend""" 21 | global chatbot_instance 22 | if chatbot_instance is None: 23 | chatbot_instance = get_chatbot() 24 | return chatbot_instance 25 | 26 | 27 | def process_message_streaming(message: str, history: List[Dict[str, str]]): 28 | """Process user message with streaming updates""" 29 | if not message.strip(): 30 | return 31 | 32 | try: 33 | chatbot = initialize_chatbot() 34 | 35 | # Add user message to history with empty response initially 36 | history.append({"role": "user", "content": message}) 37 | history.append({"role": "assistant", "content": ""}) 38 | yield history 39 | 40 | # Stream the processing 41 | full_response = "" 42 | for update in chatbot.process_natural_language_query_streaming(message): 43 | full_response = update 44 | # Update the last assistant message in history 45 | history[-1] = {"role": "assistant", "content": full_response} 46 | yield history 47 | 48 | except Exception as e: 49 | error_response = f"❌ Error processing your message: {str(e)}" 50 | history[-1] = {"role": "assistant", "content": error_response} 51 | yield history 52 | 53 | 54 | 55 | 56 | def get_graph_stats() -> str: 57 | """Get and format graph statistics""" 58 | try: 59 | chatbot = initialize_chatbot() 60 | stats = chatbot.get_graph_stats() 61 | 62 | if "error" in stats: 63 | return f"❌ Error getting stats: {stats['error']}" 64 | 65 | stats_text = f""" 66 | 📊 **Graph Statistics** 67 | 68 | 🔢 **Nodes:** {stats.get('node_count', 0)} 69 | 🔗 **Edges:** {stats.get('edge_count', 0)} 70 | 71 | 🏷️ **Node Labels:** {', '.join(stats.get('node_labels', []))} 72 | ⚡ **Relationship Types:** {', '.join(stats.get('relationship_types', []))} 73 | """ 74 | 75 | return stats_text.strip() 76 | 77 | except Exception as e: 78 | return f"❌ Error: {str(e)}" 79 | 80 | 81 | def get_schema_info() -> str: 82 | """Get and format schema information""" 83 | try: 84 | chatbot = initialize_chatbot() 85 | schema = chatbot.get_schema() 86 | 87 | if not schema: 88 | return "📋 **Graph Schema**\n\n⚠️ No schema information available. Please ensure PuppyGraph is running and has a configured schema." 89 | 90 | schema_text = "📋 **Graph Schema**\n\n" 91 | 92 | # Format vertices 93 | vertices = schema.get("vertices", []) 94 | edges = schema.get("edges", []) 95 | 96 | if not vertices and not edges: 97 | schema_text += "⚠️ **No schema found**\n\n" 98 | schema_text += "This could mean:\n" 99 | schema_text += "• PuppyGraph is not running\n" 100 | schema_text += "• No schema has been configured in PuppyGraph\n" 101 | schema_text += "• Connection to PuppyGraph failed\n\n" 102 | schema_text += "Please check your PuppyGraph server status and configuration." 103 | return schema_text 104 | 105 | if vertices: 106 | schema_text += "🟢 **Vertices:**\n" 107 | for vertex in vertices: 108 | label = vertex.get("label", "Unknown") 109 | attributes = vertex.get("attributes", []) 110 | if attributes: 111 | attr_text = ", ".join([f"{attr['name']}:{attr['type']}" for attr in attributes]) 112 | schema_text += f" • **{label}**: {attr_text}\n" 113 | else: 114 | schema_text += f" • **{label}**: (no attributes)\n" 115 | schema_text += "\n" 116 | 117 | if edges: 118 | schema_text += "🔗 **Edges:**\n" 119 | for edge in edges: 120 | label = edge.get("label", "Unknown") 121 | from_vertex = edge.get("from", "Unknown") 122 | to_vertex = edge.get("to", "Unknown") 123 | attributes = edge.get("attributes", []) 124 | if attributes: 125 | attr_text = ", ".join([f"{attr['name']}:{attr['type']}" for attr in attributes]) 126 | schema_text += f" • **{from_vertex}** -[{label}]-> **{to_vertex}**: {attr_text}\n" 127 | else: 128 | schema_text += f" • **{from_vertex}** -[{label}]-> **{to_vertex}**: (no attributes)\n" 129 | 130 | if not vertices and edges: 131 | schema_text += "\n⚠️ **Note**: Found edge definitions but no vertex definitions." 132 | elif vertices and not edges: 133 | schema_text += "\n⚠️ **Note**: Found vertex definitions but no edge definitions." 134 | 135 | return schema_text 136 | 137 | except Exception as e: 138 | return f"📋 **Graph Schema**\n\n❌ Error getting schema: {str(e)}\n\nPlease check that PuppyGraph is running and accessible." 139 | 140 | 141 | def start_new_session(): 142 | """Start a new session by clearing chat and conversation history""" 143 | try: 144 | chatbot = initialize_chatbot() 145 | chatbot.clear_conversation_history() 146 | logger.info("New session started - conversation history cleared") 147 | return [] 148 | except Exception as e: 149 | logger.error(f"Error starting new session: {e}") 150 | return [] 151 | 152 | 153 | def add_example_query(question: str, cypher: str, description: str) -> str: 154 | """Add a new example query to the RAG system""" 155 | if not question.strip() or not cypher.strip(): 156 | return "❌ Question and Cypher query are required" 157 | 158 | try: 159 | chatbot = initialize_chatbot() 160 | success = chatbot.add_query_example(question, cypher, description or "User-added example") 161 | 162 | if success: 163 | return "✅ Example added successfully to the knowledge base" 164 | else: 165 | return "❌ Failed to add example" 166 | 167 | except Exception as e: 168 | return f"❌ Error adding example: {str(e)}" 169 | 170 | 171 | def get_current_prompt_config() -> Tuple[str, str, str, str]: 172 | """Get the current prompt configuration components""" 173 | try: 174 | chatbot = initialize_chatbot() 175 | config = chatbot.get_prompt_config() 176 | return ( 177 | config.role_definition, 178 | config.plan_generation_instruction, 179 | config.puppygraph_differences, 180 | config.output_format_instruction 181 | ) 182 | except Exception as e: 183 | error_msg = f"Error getting config: {str(e)}" 184 | return error_msg, error_msg, error_msg, error_msg 185 | 186 | 187 | def update_prompt_config(role_def: str, plan_gen: str, puppygraph_diff: str, output_format: str) -> str: 188 | """Update the prompt configuration""" 189 | try: 190 | chatbot = initialize_chatbot() 191 | 192 | # Create new config with updated values 193 | new_config = PromptConfig( 194 | role_definition=role_def.strip() or """You are a helpful assistant to help answer user questions about assets in a mining site. 195 | You will need to use the information stored in the graph database to answer the user's questions. 196 | Here is some information about the graph database schema.""", 197 | plan_generation_instruction=plan_gen.strip() or """You must first output a PLAN, then you can use the PLAN to call the tools. 198 | Each STEP of the PLAN should be corresponding to one or more function calls (but not less), either simple or complex. 199 | Minimize the number of steps in the PLAN, but make sure the PLAN is workable. 200 | Remember, each step can be converted to a Cypher query, since Cypher query can handle quite complex queries, 201 | each step can be complex as well as long as it can be converted to a Cypher query. 202 | 203 | IMPORTANT RESULT HANDLING STRATEGY: 204 | - If your query results are truncated (you see "[Results truncated...]"), you have several options: 205 | 1. Use a smaller LIMIT size to get a sample of results first for exploration 206 | 2. Add COUNT(*) queries to understand total result sizes before fetching data 207 | 3. For final comprehensive results, remove LIMIT clauses entirely to provide complete downloadable data 208 | - When providing final conclusions to users, ensure the last query retrieves complete data (no LIMIT) for download 209 | - Structure your approach: exploration -> understanding -> comprehensive final result""", 210 | puppygraph_differences=puppygraph_diff.strip() or """PUPPYGRAPH DIFFERENCES FROM STANDARD CYPHER: 211 | When calculating failures for a particular asset, also first find out the work orders that are related to the asset, 212 | then count the work orders that are related to the failure using related_to_failure. 213 | DO NOT USE can_have_failure for counting total number of failures, USE related_to_failure instead.""", 214 | output_format_instruction=output_format.strip() or """OUTPUT FORMAT: 215 | Always use the format { 216 | 'THINKING': , 217 | 'PLAN': , 218 | 'CONCLUSION': , 219 | 'FINAL_DATA_AVAILABLE': , 220 | 'QUERY_EXECUTION_SUMMARY': } 221 | 222 | RESULT MANAGEMENT GUIDELINES: 223 | - For exploratory queries, use appropriate LIMIT clauses (10-50 records) 224 | - For final results intended for user download, use NO LIMIT to provide complete data 225 | - Always inform users when data is available for download 226 | - Include query execution summary to help users understand what was analyzed""" 227 | ) 228 | 229 | chatbot.update_prompt_config(new_config) 230 | return "✅ Prompt configuration updated successfully! The new settings will be used for future queries." 231 | 232 | except Exception as e: 233 | return f"❌ Error updating prompt configuration: {str(e)}" 234 | 235 | 236 | def reset_prompt_config() -> Tuple[str, str, str, str, str]: 237 | """Reset prompt configuration to defaults""" 238 | try: 239 | chatbot = initialize_chatbot() 240 | default_config = PromptConfig() # Create with defaults 241 | chatbot.update_prompt_config(default_config) 242 | 243 | return ( 244 | default_config.role_definition, 245 | default_config.plan_generation_instruction, 246 | default_config.puppygraph_differences, 247 | default_config.output_format_instruction, 248 | "✅ Prompt configuration reset to defaults!" 249 | ) 250 | except Exception as e: 251 | error_msg = f"❌ Error resetting config: {str(e)}" 252 | return "", "", "", "", error_msg 253 | 254 | 255 | def create_interface(): 256 | """Create the Gradio interface""" 257 | 258 | # Custom CSS for better styling 259 | css = """ 260 | .gradio-container { 261 | max-width: 1200px !important; 262 | } 263 | .chat-message { 264 | font-family: 'Courier New', monospace; 265 | } 266 | .stats-display { 267 | background-color: #f0f0f0; 268 | padding: 10px; 269 | border-radius: 5px; 270 | font-family: monospace; 271 | } 272 | """ 273 | 274 | with gr.Blocks(css=css, title="PuppyGraph RAG Chat") as interface: 275 | 276 | gr.Markdown(""" 277 | # 🐶 PuppyGraph RAG Chatbot 278 | 279 | Ask questions about your graph in natural language! Watch in **real-time** as I analyze your question, generate and execute multiple Cypher queries, and build a comprehensive answer step by step. 280 | 281 | **🆕 Optimized Conversation System:** Now uses efficient context management: 282 | - **System prompt**: Schema, rules, and examples defined once per session 283 | - **Conversation history**: Maintains context across questions without repetition 284 | - **Full transparency**: Complete prompts, responses, queries, and results 285 | - **Efficient prompting**: Only dynamic content sent to LLM, reducing costs 286 | - **Multi-round execution**: Context-aware query generation 287 | 288 | **Examples to try:** 289 | - "Show me all nodes in the graph" 290 | - "Count all relationships" 291 | - "What are the different types of nodes?" 292 | - "Find highly connected nodes" 293 | - "Which users have the most connections and what do they connect to?" 294 | - "What percentage of nodes have more than 5 relationships?" 295 | """) 296 | 297 | with gr.Tab("💬 Chat"): 298 | gr.Markdown(""" 299 | ### Chat with PuppyGraph 🔄 300 | **Full conversation transparency enabled** - See every prompt, response, query, and result in detail 301 | """) 302 | 303 | chatbot_ui = gr.Chatbot( 304 | value=[], 305 | height=600, 306 | label="PuppyGraph Assistant (Full Conversation Details)", 307 | show_label=True, 308 | elem_classes=["chat-message"], 309 | type="messages", 310 | render_markdown=True, 311 | sanitize_html=False 312 | ) 313 | 314 | msg = gr.Textbox( 315 | placeholder="Ask me anything about your graph...", 316 | label="Your Question", 317 | lines=2 318 | ) 319 | 320 | with gr.Row(): 321 | submit_btn = gr.Button("Send", variant="primary", size="sm") 322 | clear_btn = gr.Button("🔄 New Session", size="sm", variant="stop") 323 | 324 | # Event handlers for chat (always streaming) 325 | msg.submit( 326 | process_message_streaming, 327 | inputs=[msg, chatbot_ui], 328 | outputs=[chatbot_ui] 329 | ).then( 330 | lambda: "", outputs=[msg] # Clear input after submission 331 | ) 332 | 333 | submit_btn.click( 334 | process_message_streaming, 335 | inputs=[msg, chatbot_ui], 336 | outputs=[chatbot_ui] 337 | ).then( 338 | lambda: "", outputs=[msg] # Clear input after submission 339 | ) 340 | 341 | clear_btn.click( 342 | start_new_session, 343 | outputs=[chatbot_ui] 344 | ) 345 | 346 | with gr.Tab("📊 Graph Info"): 347 | with gr.Row(): 348 | with gr.Column(): 349 | gr.Markdown("### Graph Statistics") 350 | stats_display = gr.Textbox( 351 | label="Current Stats", 352 | lines=8, 353 | interactive=False, 354 | elem_classes=["stats-display"] 355 | ) 356 | stats_btn = gr.Button("Refresh Stats", variant="secondary") 357 | 358 | with gr.Column(): 359 | gr.Markdown("### Schema Information") 360 | schema_display = gr.Textbox( 361 | label="Graph Schema", 362 | lines=8, 363 | interactive=False, 364 | elem_classes=["stats-display"] 365 | ) 366 | schema_btn = gr.Button("Refresh Schema", variant="secondary") 367 | 368 | # Event handlers for info tab 369 | stats_btn.click( 370 | get_graph_stats, 371 | outputs=[stats_display] 372 | ) 373 | 374 | schema_btn.click( 375 | get_schema_info, 376 | outputs=[schema_display] 377 | ) 378 | 379 | with gr.Tab("➕ Add Examples"): 380 | gr.Markdown(""" 381 | ### Add Query Examples 382 | Help improve the chatbot by adding your own question-to-Cypher examples! 383 | """) 384 | 385 | example_question = gr.Textbox( 386 | label="Natural Language Question", 387 | placeholder="e.g., 'Find all users who like movies'", 388 | lines=2 389 | ) 390 | 391 | example_cypher = gr.Textbox( 392 | label="Corresponding Cypher Query", 393 | placeholder="e.g., 'MATCH (u:User)-[:LIKES]->(m:Movie) RETURN u, m LIMIT 20'", 394 | lines=3 395 | ) 396 | 397 | example_description = gr.Textbox( 398 | label="Description (optional)", 399 | placeholder="Brief description of what this query does", 400 | lines=1 401 | ) 402 | 403 | add_example_btn = gr.Button("Add Example", variant="primary") 404 | example_result = gr.Textbox( 405 | label="Result", 406 | interactive=False, 407 | lines=2 408 | ) 409 | 410 | # Event handler for adding examples 411 | add_example_btn.click( 412 | add_example_query, 413 | inputs=[example_question, example_cypher, example_description], 414 | outputs=[example_result] 415 | ) 416 | 417 | with gr.Tab("⚙️ Prompt Config"): 418 | gr.Markdown(""" 419 | ### Configure System Prompts 420 | Customize how the AI assistant behaves by configuring the four key components of the system prompt. 421 | Changes take effect immediately for new queries. 422 | """) 423 | 424 | # Role Definition 425 | with gr.Group(): 426 | gr.Markdown("#### 1️⃣ Role Definition") 427 | gr.Markdown("Define the AI's role and expertise level.") 428 | role_definition = gr.Textbox( 429 | label="Role Definition", 430 | lines=2, 431 | placeholder="You are an expert at converting natural language questions to Cypher queries for graph databases." 432 | ) 433 | 434 | # Plan Generation Instruction 435 | with gr.Group(): 436 | gr.Markdown("#### 2️⃣ Plan Generation Instruction") 437 | gr.Markdown("Tell the AI to create a plan before generating queries.") 438 | plan_generation = gr.Textbox( 439 | label="Plan Generation Instruction", 440 | lines=2, 441 | placeholder="First, analyze the question and create a step-by-step plan for generating the appropriate Cypher query." 442 | ) 443 | 444 | # PuppyGraph Differences 445 | with gr.Group(): 446 | gr.Markdown("#### 3️⃣ PuppyGraph vs Standard Cypher Differences") 447 | gr.Markdown("Explain how PuppyGraph differs from standard Cypher syntax.") 448 | puppygraph_differences = gr.Textbox( 449 | label="PuppyGraph Differences", 450 | lines=4, 451 | placeholder="""PUPPYGRAPH DIFFERENCES FROM STANDARD CYPHER: 452 | - PuppyGraph supports standard Cypher syntax 453 | - Use proper node and relationship patterns: (n)-[r]->(m) 454 | - Labels and properties follow Neo4j conventions 455 | - Functions like count(), size(), type() work as expected""" 456 | ) 457 | 458 | # Output Format Instruction 459 | with gr.Group(): 460 | gr.Markdown("#### 4️⃣ Output Format Instruction") 461 | gr.Markdown("Define the expected output format and structure.") 462 | output_format = gr.Textbox( 463 | label="Output Format Instruction", 464 | lines=4, 465 | placeholder="""OUTPUT FORMAT: 466 | Use the generate_cypher_query tool to create a Cypher query that answers this question. 467 | Provide: 468 | 1. A complete, valid Cypher query 469 | 2. A clear explanation of what the query does 470 | 3. Step-by-step reasoning (optional but helpful)""" 471 | ) 472 | 473 | # Action buttons 474 | with gr.Row(): 475 | load_current_btn = gr.Button("Load Current Config", variant="secondary") 476 | update_config_btn = gr.Button("Update Configuration", variant="primary") 477 | reset_config_btn = gr.Button("Reset to Defaults", variant="stop") 478 | 479 | # Result display 480 | config_result = gr.Textbox( 481 | label="Status", 482 | interactive=False, 483 | lines=2, 484 | placeholder="Click 'Load Current Config' to see the current settings." 485 | ) 486 | 487 | # Event handlers for prompt config 488 | load_current_btn.click( 489 | get_current_prompt_config, 490 | outputs=[role_definition, plan_generation, puppygraph_differences, output_format] 491 | ).then( 492 | lambda: "✅ Current configuration loaded into form fields above.", 493 | outputs=[config_result] 494 | ) 495 | 496 | update_config_btn.click( 497 | update_prompt_config, 498 | inputs=[role_definition, plan_generation, puppygraph_differences, output_format], 499 | outputs=[config_result] 500 | ) 501 | 502 | reset_config_btn.click( 503 | reset_prompt_config, 504 | outputs=[role_definition, plan_generation, puppygraph_differences, output_format, config_result] 505 | ) 506 | 507 | with gr.Tab("📝 Debug/Prompts"): 508 | gr.Markdown("### Prompt Debugging") 509 | gr.Markdown(""" 510 | This tab shows the latest prompt used to generate Cypher queries. This is useful for understanding 511 | how the RAG system constructs prompts and for debugging query generation issues. 512 | """) 513 | 514 | with gr.Row(): 515 | with gr.Column(): 516 | latest_prompt_display = gr.Textbox( 517 | label="Latest Prompt Used", 518 | lines=12, 519 | interactive=False, 520 | placeholder="No prompts captured yet. Run a query to see the prompt used." 521 | ) 522 | 523 | with gr.Column(): 524 | latest_response_display = gr.Textbox( 525 | label="Latest LLM Response", 526 | lines=12, 527 | interactive=False, 528 | placeholder="No LLM responses captured yet. Run a query to see the response." 529 | ) 530 | 531 | refresh_debug_btn = gr.Button("Refresh Latest Prompt & Response", variant="secondary") 532 | 533 | # Function to get the latest prompt and response from conversation history 534 | def get_latest_debug_info(): 535 | try: 536 | chatbot = initialize_chatbot() 537 | history = chatbot.get_conversation_history(1) 538 | if history and "executed_steps" in history[0]: 539 | steps = history[0]["executed_steps"] 540 | if steps: 541 | latest_step = steps[-1] 542 | prompt = getattr(latest_step, 'prompt', None) or "No prompt available" 543 | response = getattr(latest_step, 'llm_response', None) or "No LLM response available" 544 | return prompt, response 545 | return "No debug information available in recent conversation history.", "No debug information available in recent conversation history." 546 | except Exception as e: 547 | error_msg = f"Error retrieving debug info: {str(e)}" 548 | return error_msg, error_msg 549 | 550 | refresh_debug_btn.click( 551 | get_latest_debug_info, 552 | outputs=[latest_prompt_display, latest_response_display] 553 | ) 554 | 555 | with gr.Tab("ℹ️ Help"): 556 | gr.Markdown(""" 557 | ## How to Use PuppyGraph RAG Chatbot 558 | 559 | ### 🗣️ Chat Tab 560 | - Type natural language questions about your graph 561 | - **Real-time streaming**: Watch each query step execute live as it happens 562 | - **Multi-round execution**: generates and runs multiple Cypher queries as needed 563 | - Claude Sonnet 4.0 decides when it has enough information to provide a complete answer 564 | - **🆕 Full conversation transparency**: See complete details including: 565 | - Full prompts sent to Claude Sonnet 4.0 with schema and context 566 | - Complete LLM responses showing reasoning and decision-making 567 | - All generated Cypher queries with explanations 568 | - Full query results with detailed data samples 569 | - Step-by-step progression through the entire conversation 570 | 571 | ### 📊 Graph Info Tab 572 | - View basic statistics about your graph (node count, edge count, etc.) 573 | - Explore the schema to understand available node types and relationships 574 | 575 | ### ➕ Add Examples Tab 576 | - Teach the chatbot new patterns by adding question-query pairs 577 | - Your examples will be used to improve future query generation 578 | 579 | ### ⚙️ Prompt Config Tab 580 | - **Customize AI behavior**: Configure the 4 key components of system prompts 581 | - **Role Definition**: Set the AI's expertise level and domain knowledge 582 | - **Plan Generation**: Control whether the AI creates execution plans first 583 | - **PuppyGraph Differences**: Define how PuppyGraph differs from standard Cypher 584 | - **Output Format**: Specify the expected response structure 585 | - **Real-time updates**: Changes take effect immediately for new queries 586 | - **Reset to defaults**: Easily restore original prompt settings 587 | 588 | ### 📝 Debug/Prompts Tab 589 | - View the exact prompts sent to Claude Sonnet 4.0 for query generation 590 | - View the raw LLM responses received from Claude 591 | - Useful for understanding how the RAG system works and debugging issues 592 | - Prompts include schema info, similar examples, and conversation context 593 | 594 | ### 🔧 Technical Details 595 | - **Backend**: Python with FastAPI 596 | - **Graph DB**: PuppyGraph (Cypher queries via Bolt protocol) 597 | - **RAG System**: ChromaDB + SentenceTransformers + Claude Sonnet 4.0 598 | - **MCP Integration**: Custom Model Context Protocol server 599 | 600 | ### 💡 Tips 601 | - Be specific in your questions for better results 602 | - Check the confidence score - higher scores indicate more reliable queries 603 | - Use the schema information to understand what data is available 604 | - Add your own examples to improve performance for your specific use case 605 | """) 606 | 607 | # Load initial data when interface starts 608 | interface.load( 609 | fn=lambda: (get_graph_stats(), get_schema_info()), 610 | outputs=[stats_display, schema_display] 611 | ) 612 | 613 | return interface 614 | 615 | 616 | def main(): 617 | """Main function to run the application""" 618 | try: 619 | # Initialize the chatbot 620 | logger.info("Initializing PuppyGraph RAG Chatbot...") 621 | initialize_chatbot() 622 | 623 | # Create and launch the interface 624 | interface = create_interface() 625 | 626 | logger.info("Starting Gradio interface...") 627 | interface.launch( 628 | server_name="0.0.0.0", 629 | server_port=7860, 630 | share=False, 631 | show_error=True, 632 | debug=True 633 | ) 634 | 635 | except KeyboardInterrupt: 636 | logger.info("Shutting down...") 637 | except Exception as e: 638 | logger.error(f"Error running application: {e}") 639 | finally: 640 | # Cleanup 641 | shutdown_chatbot() 642 | logger.info("Application shutdown complete") 643 | 644 | 645 | if __name__ == "__main__": 646 | main() --------------------------------------------------------------------------------