├── .python-version
├── tests
    ├── __init__.py
    ├── test_tool_schema.py
    ├── test_quick_tool.py
    ├── test_mcp_connectivity.py
    ├── simple_test.py
    ├── test_ast_grep.py
    ├── mcp_tools_test.py
    ├── test_mcp_tools.py
    ├── test_mcp_server.py
    ├── test_multi_language.py
    ├── test_rustworkx_performance.py
    ├── test_mcp_rustworkx_integration.py
    └── test_rustworkx_graph.py
├── .claude
    └── settings.local.json
├── .mcp.json
├── src
    └── code_graph_mcp
    │   ├── __init__.py
    │   ├── file_watcher.py
    │   ├── universal_graph.py
    │   └── universal_ast.py
├── .gitignore
├── LICENSE
├── pyproject.toml
├── README.md
└── CHANGELOG.md


/.python-version:
--------------------------------------------------------------------------------
1 | 3.12
2 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Test suite for Code Graph MCP Server
3 | Comprehensive testing for all MCP tools and functionality
4 | """
5 | 


--------------------------------------------------------------------------------
/.claude/settings.local.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "permissions": {
 3 |     "allow": [
 4 |       "Bash(chmod:*)",
 5 |       "Bash(./install-claude-hooks.sh:*)",
 6 |       "Bash(ls:*)",
 7 |       "mcp__vibe_kanban__list_projects"
 8 |     ],
 9 |     "deny": []
10 |   }
11 | }


--------------------------------------------------------------------------------
/.mcp.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "mcpServers": {
 3 |     "code-graph-mcp": {
 4 |       "type": "stdio",
 5 |       "command": "uv run code-graph-mcp --project-root /home/shawn/workspace/0-projects/code-graph-mcp --verbose",
 6 |       "args": [],
 7 |       "env": {}
 8 |     }
 9 |   }
10 | }


--------------------------------------------------------------------------------
/src/code_graph_mcp/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Code Graph MCP Server
 3 | 
 4 | Enterprise-ready Model Context Protocol server providing comprehensive
 5 | code analysis, navigation, and quality assessment capabilities.
 6 | """
 7 | 
 8 | from .server import cli as main
 9 | 
10 | 
11 | __version__ = "1.0.9"
12 | __all__ = ["main"]
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python-generated files
 2 | __pycache__/
 3 | *.py[oc]
 4 | build/
 5 | dist/
 6 | wheels/
 7 | *.egg-info
 8 | .hook/
 9 | 
10 | # Virtual environments
11 | .venv
12 | REFERENCE/
13 | 
14 | # Logs
15 | logs
16 | *.log
17 | npm-debug.log*
18 | yarn-debug.log*
19 | yarn-error.log*
20 | dev-debug.log
21 | # Dependency directories
22 | node_modules/
23 | # Environment variables
24 | .env
25 | # Editor directories and files
26 | .idea
27 | .vscode
28 | *.suo
29 | *.ntvs*
30 | *.njsproj
31 | *.sln
32 | *.sw?
33 | # OS specific
34 | .DS_Store
35 | 
36 | # Task files
37 | # tasks.json
38 | # tasks/ 
39 | 


--------------------------------------------------------------------------------
/tests/test_tool_schema.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Test what our tools actually look like when serialized to JSON
 4 | """
 5 | 
 6 | import json
 7 | import mcp.types as types
 8 | 
 9 | # Create one of our tools
10 | tool = types.Tool(
11 |     name="complexity_analysis",
12 |     description="Analyze code complexity and refactoring opportunities",
13 |     inputSchema={
14 |         "type": "object",
15 |         "properties": {
16 |             "threshold": {
17 |                 "type": "integer",
18 |                 "description": "Minimum complexity threshold to report",
19 |                 "default": 10,
20 |             }
21 |         },
22 |     },
23 | )
24 | 
25 | print("Our tool as dict:")
26 | print(json.dumps(tool.model_dump(), indent=2))
27 | 
28 | print("\nOur tool JSON schema:")
29 | print(json.dumps(tool.model_json_schema(), indent=2))
30 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Shawn McAllister @entrepeneur4lyf
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/tests/test_quick_tool.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Quick test of a single MCP tool"""
 3 | 
 4 | import asyncio
 5 | from mcp import ClientSession, StdioServerParameters
 6 | from mcp.client.stdio import stdio_client
 7 | 
 8 | async def test_single_tool():
 9 |     server_params = StdioServerParameters(
10 |         command="code-graph-mcp",
11 |         args=["--project-root", "."],
12 |     )
13 | 
14 |     async with stdio_client(server_params) as (read, write):
15 |         async with ClientSession(read, write) as session:
16 |             await session.initialize()
17 | 
18 |             # Test project_statistics tool
19 |             result = await session.call_tool("project_statistics", {})
20 | 
21 |             content = ""
22 |             if result.content:
23 |                 for item in result.content:
24 |                     if hasattr(item, 'text'):
25 |                         content += item.text
26 | 
27 |             print("🎯 project_statistics result:")
28 |             print(content[:500])
29 |             print(f"\n✅ SUCCESS: {len(content)} characters returned")
30 | 
31 | if __name__ == "__main__":
32 |     asyncio.run(test_single_tool())
33 | 


--------------------------------------------------------------------------------
/tests/test_mcp_connectivity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Simple MCP Connectivity Test
 4 | Just tests that the server is reachable and tools are listed
 5 | """
 6 | 
 7 | import asyncio
 8 | from mcp import ClientSession, StdioServerParameters
 9 | from mcp.client.stdio import stdio_client
10 | 
11 | 
12 | async def test_basic_connectivity():
13 |     """Test basic MCP server connectivity"""
14 | 
15 |     print("🔗 Testing MCP Server Connectivity")
16 |     print("=" * 40)
17 | 
18 |     try:
19 |         server_params = StdioServerParameters(
20 |             command="code-graph-mcp",
21 |             args=["--project-root", ".", "--verbose"],
22 |         )
23 |         async with stdio_client(server_params) as (read, write):
24 |             async with ClientSession(read, write) as session:
25 |                 await session.initialize()
26 | 
27 |                 print("✅ Server connection established")
28 | 
29 |                 # List available tools
30 |                 print("\n📋 Listing Tools...")
31 |                 tools = await session.list_tools()
32 |                 print(f"✅ Found {len(tools.tools)} tools:")
33 | 
34 |                 for tool in tools.tools:
35 |                     print(f"  • {tool.name}: {tool.description}")
36 | 
37 |                 print(f"\n🎯 SUCCESS: MCP server is properly exposing {len(tools.tools)} tools")
38 |                 return True
39 | 
40 |     except Exception as e:
41 |         print(f"❌ Connection failed: {e}")
42 |         return False
43 | 
44 | 
45 | async def main():
46 |     """Run connectivity test"""
47 |     success = await test_basic_connectivity()
48 |     return 0 if success else 1
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     exit_code = asyncio.run(main())
53 |     exit(exit_code)
54 | 


--------------------------------------------------------------------------------
/tests/simple_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """Simple MCP server test"""
 3 | 
 4 | import asyncio
 5 | import subprocess
 6 | 
 7 | async def test_basic_functionality():
 8 |     """Test basic server functionality"""
 9 |     print("🚀 Testing Code Graph MCP Server")
10 | 
11 |     # Test 1: Can we start the server?
12 |     print("\n1. Testing server startup...")
13 |     try:
14 |         result = subprocess.run([
15 |             "code-graph-mcp", "--help"
16 |         ], capture_output=True, text=True, timeout=10)
17 | 
18 |         if result.returncode == 0:
19 |             print("✅ Server command works")
20 |             print(f"   Output: {result.stdout[:100]}...")
21 |         else:
22 |             print(f"❌ Server command failed: {result.stderr}")
23 |             return False
24 |     except Exception as e:
25 |         print(f"❌ Exception: {e}")
26 |         return False
27 | 
28 |     # Test 2: Can we start with project root?
29 |     print("\n2. Testing server with project root...")
30 |     try:
31 |         proc = subprocess.Popen([
32 |             "code-graph-mcp", "--project-root", ".", "--verbose"
33 |         ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
34 | 
35 |         # Let it run for 2 seconds then kill
36 |         await asyncio.sleep(2)
37 |         proc.terminate()
38 |         stdout, stderr = proc.communicate()
39 | 
40 |         if "Initializing server" in stderr or "code-graph-intelligence" in stderr:
41 |             print("✅ Server initializes correctly")
42 |             print("   Debug output contains expected server initialization")
43 |         else:
44 |             print(f"❌ Unexpected output: {stderr[:200]}")
45 |             return False
46 | 
47 |     except Exception as e:
48 |         print(f"❌ Exception: {e}")
49 |         return False
50 | 
51 |     print("\n🎯 Basic functionality test: PASSED")
52 |     return True
53 | 
54 | if __name__ == "__main__":
55 |     asyncio.run(test_basic_functionality())
56 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "code-graph-mcp"
 3 | version = "1.2.3"
 4 | description = "MCP server for multi-language code graph intelligence and analysis across 25+ programming languages"
 5 | readme = "README.md"
 6 | authors = [
 7 |     { name = "entrepeneur4lyf", email = "shawn.payments@gmail.com" }
 8 | ]
 9 | license = { file = "LICENSE" }
10 | requires-python = ">=3.12"
11 | dependencies = [
12 |     "mcp>=1.12.2",
13 |     "ast-grep-py>=0.39.0",
14 |     "anyio>=4.0.0",
15 |     "click>=8.0.0",
16 |     "rustworkx>=0.15.0",
17 |     "watchdog>=6.0.0",
18 | ]
19 | keywords = [
20 |     "mcp", "code-analysis", "ast", "claude-code", "graph", "intelligence", 
21 |     "python", "code-quality", "complexity-analysis", "static-analysis"
22 | ]
23 | classifiers = [
24 |     "Development Status :: 5 - Production/Stable",
25 |     "Intended Audience :: Developers",
26 |     "License :: OSI Approved :: MIT License",
27 |     "Programming Language :: Python :: 3",
28 |     "Programming Language :: Python :: 3.12",
29 |     "Topic :: Software Development :: Code Generators",
30 |     "Topic :: Software Development :: Libraries :: Python Modules",
31 |     "Topic :: Software Development :: Compilers",
32 |     "Topic :: Software Development :: Quality Assurance",
33 |     "Topic :: Text Processing :: Linguistic",
34 | ]
35 | 
36 | [project.optional-dependencies]
37 | dev = [
38 |     "pytest>=7.0.0",
39 |     "pytest-asyncio>=0.23.0",
40 |     "black>=23.0.0",
41 |     "ruff>=0.1.0",
42 | ]
43 | test = [
44 |     "pytest>=7.0.0",
45 |     "pytest-asyncio>=0.23.0",
46 | ]
47 | 
48 | [project.urls]
49 | Homepage = "https://github.com/entrepeneur4lyf/code-graph-mcp"
50 | Repository = "https://github.com/entrepeneur4lyf/code-graph-mcp"
51 | Documentation = "https://github.com/entrepeneur4lyf/code-graph-mcp#readme"
52 | Changelog = "https://github.com/entrepeneur4lyf/code-graph-mcp/releases"
53 | Issues = "https://github.com/entrepeneur4lyf/code-graph-mcp/issues"
54 | 
55 | [project.scripts]
56 | code-graph-mcp = "code_graph_mcp:main"
57 | 
58 | [build-system]
59 | requires = ["hatchling"]
60 | build-backend = "hatchling.build"
61 | 
62 | [tool.hatch.build.targets.wheel]
63 | packages = ["src/code_graph_mcp"]
64 | 
65 | [tool.hatch.build.targets.sdist]
66 | include = [
67 |     "/src",
68 |     "/README.md", 
69 |     "/pyproject.toml",
70 | ]
71 | 
72 | [tool.pytest.ini_options]
73 | testpaths = ["tests"]
74 | python_files = ["test_*.py"]
75 | python_classes = ["Test*"]
76 | python_functions = ["test_*"]
77 | addopts = "-v --tb=short"
78 | 
79 | [tool.ruff]
80 | target-version = "py312"
81 | line-length = 100
82 | 
83 | [tool.ruff.lint]
84 | select = ["E", "F", "W", "C90"]
85 | ignore = ["E501"]  # Line too long (handled by formatter)
86 | 
87 | [tool.black]
88 | target-version = ["py312"]
89 | line-length = 100
90 | 
91 | [dependency-groups]
92 | dev = [
93 |     "pytest>=8.4.1",
94 |     "pytest-asyncio>=1.1.0",
95 |     "twine>=6.1.0",
96 | ]
97 | 


--------------------------------------------------------------------------------
/tests/test_ast_grep.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Test script to explore ast-grep capabilities and supported languages
 4 | """
 5 | 
 6 | import ast_grep_py as ag
 7 | 
 8 | # Test basic functionality
 9 | def test_supported_languages():
10 |     """Explore what languages ast-grep supports"""
11 |     print("=== AST-GREP LANGUAGE EXPLORATION ===")
12 | 
13 |     # Test with different language files to see what works
14 |     test_cases = [
15 |         ("Python", "def hello():\n    print('world')", "python"),
16 |         ("JavaScript", "function hello() {\n    console.log('world');\n}", "javascript"),
17 |         ("TypeScript", "function hello(): void {\n    console.log('world');\n}", "typescript"),
18 |         ("Java", "public class Hello {\n    public static void main(String[] args) {\n        System.out.println(\"world\");\n    }\n}", "java"),
19 |         ("C", "#include <stdio.h>\nint main() {\n    printf(\"world\");\n    return 0;\n}", "c"),
20 |         ("Rust", "fn main() {\n    println!(\"world\");\n}", "rust"),
21 |         ("Go", "package main\nimport \"fmt\"\nfunc main() {\n    fmt.Println(\"world\")\n}", "go"),
22 |     ]
23 | 
24 |     supported_languages = []
25 | 
26 |     for lang_name, code, lang_id in test_cases:
27 |         try:
28 |             print(f"\n--- Testing {lang_name} ---")
29 |             root = ag.SgRoot(code, lang_id)
30 |             root_node = root.root()
31 |             print(f"✅ {lang_name}: Successfully parsed (root kind: {root_node.kind()})")
32 |             supported_languages.append(lang_name)
33 | 
34 |             # Try to find patterns based on language
35 |             if lang_name == "Python":
36 |                 nodes = root_node.find_all({"rule": {"kind": "function_definition"}})
37 |             elif lang_name in ["JavaScript", "TypeScript"]:
38 |                 nodes = root_node.find_all({"rule": {"kind": "function_declaration"}})
39 |             elif lang_name == "Java":
40 |                 nodes = root_node.find_all({"rule": {"kind": "method_declaration"}})
41 |             else:
42 |                 nodes = []
43 | 
44 |             print(f"   Functions found: {len(nodes) if nodes else 0}")
45 | 
46 |         except Exception as e:
47 |             print(f"❌ {lang_name}: Error - {e}")
48 | 
49 |     print("\n=== SUMMARY ===")
50 |     print(f"Supported languages ({len(supported_languages)}): {', '.join(supported_languages)}")
51 |     return supported_languages
52 | 
53 | def test_node_capabilities():
54 |     """Test ast-grep node traversal and querying capabilities"""
55 |     print("\n=== AST-GREP NODE CAPABILITIES ===")
56 | 
57 |     # Python example
58 |     python_code = """
59 | def calculate_complexity(node):
60 |     '''Calculate cyclomatic complexity'''
61 |     complexity = 1
62 |     for child in node.children():
63 |         if child.kind() in ['if_statement', 'while_statement']:
64 |             complexity += 1
65 |     return complexity
66 | 
67 | class CodeAnalyzer:
68 |     def __init__(self, root_path):
69 |         self.root_path = root_path
70 |         self.cache = {}
71 | """
72 | 
73 |     root = ag.SgRoot(python_code, "python")
74 |     root_node = root.root()
75 | 
76 |     print("Root node:", root_node.kind())
77 |     print("Child count:", len(root_node.children()))
78 | 
79 |     # Find function definitions
80 |     functions = root_node.find_all({"rule": {"kind": "function_definition"}})
81 |     print(f"Functions found: {len(functions)}")
82 | 
83 |     for func in functions:
84 |         print(f"  - Function: {func.text()[:50]}...")
85 |         print(f"    Kind: {func.kind()}")
86 | 
87 |     # Find class definitions
88 |     classes = root_node.find_all({"rule": {"kind": "class_definition"}})
89 |     print(f"Classes found: {len(classes)}")
90 | 
91 |     for cls in classes:
92 |         print(f"  - Class: {cls.text()[:50]}...")
93 | 
94 | if __name__ == "__main__":
95 |     supported_langs = test_supported_languages()
96 |     test_node_capabilities()
97 | 


--------------------------------------------------------------------------------
/tests/mcp_tools_test.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Manual MCP Tools Test - Direct server communication
  4 | Tests all 8 MCP tools and generates a comprehensive report
  5 | """
  6 | 
  7 | import asyncio
  8 | import json
  9 | from pathlib import Path
 10 | from datetime import datetime
 11 | 
 12 | from mcp import ClientSession, StdioServerParameters
 13 | from mcp.client.stdio import stdio_client
 14 | 
 15 | 
 16 | async def test_all_mcp_tools():
 17 |     """Test all MCP tools and generate report"""
 18 | 
 19 |     results = {
 20 |         "test_timestamp": datetime.now().isoformat(),
 21 |         "project_path": str(Path.cwd()),
 22 |         "tool_results": {}
 23 |     }
 24 | 
 25 |     print("🧪 Testing Code Graph MCP Server Tools")
 26 |     print("=" * 60)
 27 | 
 28 |     try:
 29 |         server_params = StdioServerParameters(
 30 |             command="code-graph-mcp",
 31 |             args=["--project-root", "."],
 32 |         )
 33 |         async with stdio_client(server_params) as (read, write):
 34 |             async with ClientSession(read, write) as session:
 35 |                 await session.initialize()
 36 | 
 37 |                 # List available tools first
 38 |                 print("\n📋 Available Tools:")
 39 |                 tools = await session.list_tools()
 40 |                 for tool in tools.tools:
 41 |                     print(f"  • {tool.name}: {tool.description}")
 42 | 
 43 |                 # Test each tool
 44 |                 test_cases = [
 45 |                     ("analyze_codebase", {}),
 46 |                     ("project_statistics", {}),
 47 |                     ("dependency_analysis", {}),
 48 |                     ("complexity_analysis", {"threshold": 10}),
 49 |                     ("find_definition", {"symbol": "main"}),
 50 |                     ("find_references", {"symbol": "main"}),
 51 |                     ("find_callers", {"function": "main"}),
 52 |                     ("find_callees", {"function": "main"}),
 53 |                 ]
 54 | 
 55 |                 for tool_name, args in test_cases:
 56 |                     print(f"\n🔧 Testing {tool_name}...")
 57 |                     try:
 58 |                         result = await session.call_tool(tool_name, args)
 59 | 
 60 |                         # Extract content
 61 |                         content = ""
 62 |                         if result.content:
 63 |                             for item in result.content:
 64 |                                 if hasattr(item, 'text'):
 65 |                                     content += item.text
 66 | 
 67 |                         success = bool(content.strip())
 68 |                         results["tool_results"][tool_name] = {
 69 |                             "status": "SUCCESS" if success else "EMPTY",
 70 |                             "content_length": len(content),
 71 |                             "preview": content[:200] + "..." if len(content) > 200 else content,
 72 |                             "arguments": args
 73 |                         }
 74 | 
 75 |                         status = "✅" if success else "⚠️ "
 76 |                         print(f"  {status} {tool_name}: {len(content)} chars returned")
 77 | 
 78 |                     except Exception as e:
 79 |                         results["tool_results"][tool_name] = {
 80 |                             "status": "ERROR",
 81 |                             "error": str(e),
 82 |                             "arguments": args
 83 |                         }
 84 |                         print(f"  ❌ {tool_name}: {e}")
 85 | 
 86 |     except Exception as e:
 87 |         print(f"❌ Server connection failed: {e}")
 88 |         results["server_error"] = str(e)
 89 | 
 90 |     # Generate summary
 91 |     successful_tools = sum(1 for r in results["tool_results"].values() if r["status"] == "SUCCESS")
 92 |     total_tools = len(results["tool_results"])
 93 | 
 94 |     if total_tools > 0:
 95 |         print(f"\n📊 SUMMARY: {successful_tools}/{total_tools} tools working ({successful_tools/total_tools*100:.1f}%)")
 96 |     else:
 97 |         print("\n📊 SUMMARY: No tools tested")
 98 | 
 99 |     return results
100 | 
101 | 
102 | async def main():
103 |     """Run tests and save report"""
104 |     results = await test_all_mcp_tools()
105 | 
106 |     # Save detailed results
107 |     with open("mcp_test_results.json", "w") as f:
108 |         json.dump(results, f, indent=2)
109 | 
110 |     print("\n💾 Detailed results saved to mcp_test_results.json")
111 |     return results
112 | 
113 | 
114 | if __name__ == "__main__":
115 |     asyncio.run(main())
116 | 


--------------------------------------------------------------------------------
/tests/test_mcp_tools.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """Test script for code-graph-mcp MCP tools."""
  3 | 
  4 | import asyncio
  5 | import logging
  6 | from mcp import ClientSession, StdioServerParameters
  7 | from mcp.client.stdio import stdio_client
  8 | 
  9 | # Configure logging
 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 11 | logger = logging.getLogger(__name__)
 12 | 
 13 | async def test_mcp_tool(session, tool_name, arguments=None):
 14 |     """Test a specific MCP tool and return results."""
 15 |     if arguments is None:
 16 |         arguments = {}
 17 | 
 18 |     try:
 19 |         logger.info(f"Testing tool: {tool_name} with args: {arguments}")
 20 |         result = await session.call_tool(tool_name, arguments)
 21 | 
 22 |         return {
 23 |             'tool': tool_name,
 24 |             'success': True,
 25 |             'content': result.content,
 26 |             'error': None
 27 |         }
 28 |     except Exception as e:
 29 |         logger.error(f"Error testing {tool_name}: {str(e)}")
 30 |         return {
 31 |             'tool': tool_name,
 32 |             'success': False,
 33 |             'content': None,
 34 |             'error': str(e)
 35 |         }
 36 | 
 37 | async def run_all_tests():
 38 |     """Run all MCP tool tests."""
 39 |     project_root = '/home/shawn/workspace/0-projects/code-graph-mcp'
 40 | 
 41 |     server_params = StdioServerParameters(
 42 |         command='uv',
 43 |         args=['run', 'code-graph-mcp', '--project-root', project_root, '--verbose']
 44 |     )
 45 | 
 46 |     # Define test cases for each tool
 47 |     test_cases = [
 48 |         ('analyze_codebase', {}),
 49 |         ('find_definition', {'symbol': 'main'}),
 50 |         ('find_references', {'symbol': 'main'}),
 51 |         ('find_callers', {'function': 'main'}),
 52 |         ('find_callees', {'function': 'main'}),
 53 |         ('complexity_analysis', {'threshold': 10}),
 54 |         ('dependency_analysis', {}),
 55 |         ('project_statistics', {})
 56 |     ]
 57 | 
 58 |     results = []
 59 | 
 60 |     try:
 61 |         async with stdio_client(server_params) as (read, write):
 62 |             async with ClientSession(read, write) as session:
 63 |                 await session.initialize()
 64 |                 logger.info("MCP session initialized successfully")
 65 | 
 66 |                 # List available tools first
 67 |                 try:
 68 |                     tools = await session.list_tools()
 69 |                     logger.info(f"Available tools: {[tool.name for tool in tools.tools]}")
 70 |                 except Exception as e:
 71 |                     logger.error(f"Failed to list tools: {e}")
 72 |                     return []
 73 | 
 74 |                 # Test each tool
 75 |                 for tool_name, args in test_cases:
 76 |                     result = await test_mcp_tool(session, tool_name, args)
 77 |                     results.append(result)
 78 | 
 79 |                     # Add a small delay between tests
 80 |                     await asyncio.sleep(0.5)
 81 | 
 82 |     except Exception as e:
 83 |         logger.error(f"Failed to create MCP session: {e}")
 84 |         return []
 85 | 
 86 |     return results
 87 | 
 88 | def print_results(results):
 89 |     """Print formatted test results."""
 90 |     print("\n" + "="*80)
 91 |     print("MCP TOOL TEST RESULTS")
 92 |     print("="*80)
 93 | 
 94 |     for i, result in enumerate(results, 1):
 95 |         print(f"\n{i}. Tool: {result['tool']}")
 96 |         print(f"   Status: {'✅ SUCCESS' if result['success'] else '❌ FAILED'}")
 97 | 
 98 |         if result['success'] and result['content']:
 99 |             # Try to format content nicely
100 |             content = result['content']
101 |             if isinstance(content, list) and len(content) > 0:
102 |                 first_item = content[0]
103 |                 if hasattr(first_item, 'text'):
104 |                     text_content = first_item.text
105 |                     # Truncate very long content
106 |                     if len(text_content) > 500:
107 |                         print(f"   Content: {text_content[:500]}... [truncated]")
108 |                     else:
109 |                         print(f"   Content: {text_content}")
110 |                 else:
111 |                     print(f"   Content: {str(content)[:300]}...")
112 |             else:
113 |                 print(f"   Content: {str(content)[:300]}...")
114 |         elif not result['success']:
115 |             print(f"   Error: {result['error']}")
116 |         else:
117 |             print("   Content: No content returned")
118 | 
119 |     print("\n" + "="*80)
120 | 
121 |     # Summary
122 |     successful = sum(1 for r in results if r['success'])
123 |     total = len(results)
124 |     print(f"SUMMARY: {successful}/{total} tools tested successfully")
125 |     print("="*80)
126 | 
127 | if __name__ == "__main__":
128 |     results = asyncio.run(run_all_tests())
129 |     print_results(results)
130 | 


--------------------------------------------------------------------------------
/tests/test_mcp_server.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive test for the Code Graph MCP Server
  4 | Tests all 8 MCP tools and server functionality
  5 | """
  6 | 
  7 | import asyncio
  8 | import subprocess
  9 | import sys
 10 | from typing import Any, Dict
 11 | 
 12 | from mcp.client.session import ClientSession
 13 | from mcp.client.stdio import stdio_client
 14 | 
 15 | 
 16 | class MCPServerTest:
 17 |     """Test suite for the Code Graph MCP Server"""
 18 | 
 19 |     def __init__(self):
 20 |         self.results = []
 21 |         self.server_process = None
 22 | 
 23 |     async def run_all_tests(self):
 24 |         """Run all MCP server tests"""
 25 |         print("🚀 Starting Code Graph MCP Server Tests")
 26 |         print("=" * 50)
 27 | 
 28 |         # Test 1: Server startup
 29 |         await self.test_server_startup()
 30 | 
 31 |         # Test 2: Tool listing
 32 |         await self.test_tool_listing()
 33 | 
 34 |         # Test 3: Individual tool tests
 35 |         await self.test_analyze_codebase()
 36 |         await self.test_find_definition()
 37 |         await self.test_find_references()
 38 |         await self.test_find_callers()
 39 |         await self.test_find_callees()
 40 |         await self.test_complexity_analysis()
 41 |         await self.test_dependency_analysis()
 42 |         await self.test_project_statistics()
 43 | 
 44 |         # Summary
 45 |         self.print_summary()
 46 | 
 47 |     async def test_server_startup(self):
 48 |         """Test if the server starts correctly"""
 49 |         print("\n📋 Test 1: Server Startup")
 50 |         try:
 51 |             # Test direct command
 52 |             result = subprocess.run([
 53 |                 "code-graph-mcp", "--project-root", ".", "--help"
 54 |             ], capture_output=True, text=True, timeout=10)
 55 | 
 56 |             if result.returncode == 0 and "Code Graph Intelligence MCP Server" in result.stdout:
 57 |                 self.log_success("Server startup", "Server starts and shows help")
 58 |             else:
 59 |                 self.log_failure("Server startup", f"Command failed: {result.stderr}")
 60 | 
 61 |         except Exception as e:
 62 |             self.log_failure("Server startup", f"Exception: {e}")
 63 | 
 64 |     async def test_tool_listing(self):
 65 |         """Test MCP tool listing via stdio client"""
 66 |         print("\n📋 Test 2: Tool Listing")
 67 |         try:
 68 |             command = ["code-graph-mcp", "--project-root", "."]
 69 |             async with stdio_client(command) as streams:
 70 |                 async with ClientSession(streams[0], streams[1]) as session:
 71 |                     tools = await session.list_tools()
 72 | 
 73 |                     expected_tools = {
 74 |                         "analyze_codebase", "find_definition", "find_references",
 75 |                         "find_callers", "find_callees", "complexity_analysis",
 76 |                         "dependency_analysis", "project_statistics"
 77 |                     }
 78 | 
 79 |                     actual_tools = {tool.name for tool in tools.tools}
 80 | 
 81 |                     if expected_tools.issubset(actual_tools):
 82 |                         self.log_success("Tool listing", f"All 8 tools available: {actual_tools}")
 83 |                     else:
 84 |                         missing = expected_tools - actual_tools
 85 |                         self.log_failure("Tool listing", f"Missing tools: {missing}")
 86 | 
 87 |         except Exception as e:
 88 |             self.log_failure("Tool listing", f"Exception: {e}")
 89 | 
 90 |     async def test_analyze_codebase(self):
 91 |         """Test analyze_codebase tool"""
 92 |         print("\n📋 Test 3: Analyze Codebase")
 93 |         await self.test_tool("analyze_codebase", {})
 94 | 
 95 |     async def test_find_definition(self):
 96 |         """Test find_definition tool"""
 97 |         print("\n📋 Test 4: Find Definition")
 98 |         await self.test_tool("find_definition", {"symbol": "main"})
 99 | 
100 |     async def test_find_references(self):
101 |         """Test find_references tool"""
102 |         print("\n📋 Test 5: Find References")
103 |         await self.test_tool("find_references", {"symbol": "main"})
104 | 
105 |     async def test_find_callers(self):
106 |         """Test find_callers tool"""
107 |         print("\n📋 Test 6: Find Callers")
108 |         await self.test_tool("find_callers", {"function": "main"})
109 | 
110 |     async def test_find_callees(self):
111 |         """Test find_callees tool"""
112 |         print("\n📋 Test 7: Find Callees")
113 |         await self.test_tool("find_callees", {"function": "main"})
114 | 
115 |     async def test_complexity_analysis(self):
116 |         """Test complexity_analysis tool"""
117 |         print("\n📋 Test 8: Complexity Analysis")
118 |         await self.test_tool("complexity_analysis", {"threshold": 10})
119 | 
120 |     async def test_dependency_analysis(self):
121 |         """Test dependency_analysis tool"""
122 |         print("\n📋 Test 9: Dependency Analysis")
123 |         await self.test_tool("dependency_analysis", {})
124 | 
125 |     async def test_project_statistics(self):
126 |         """Test project_statistics tool"""
127 |         print("\n📋 Test 10: Project Statistics")
128 |         await self.test_tool("project_statistics", {})
129 | 
130 |     async def test_tool(self, tool_name: str, arguments: Dict[str, Any]):
131 |         """Generic tool test"""
132 |         try:
133 |             command = ["code-graph-mcp", "--project-root", "."]
134 |             async with stdio_client(command) as streams:
135 |                 async with ClientSession(streams[0], streams[1]) as session:
136 |                     result = await session.call_tool(tool_name, arguments)
137 | 
138 |                     if result.content and len(result.content) > 0:
139 |                         # Check if result contains meaningful content
140 |                         content_text = ""
141 |                         for content in result.content:
142 |                             if hasattr(content, 'text'):
143 |                                 content_text += content.text
144 | 
145 |                         if content_text.strip():
146 |                             self.log_success(tool_name, f"Returned content ({len(content_text)} chars)")
147 |                         else:
148 |                             self.log_failure(tool_name, "Empty content returned")
149 |                     else:
150 |                         self.log_failure(tool_name, "No content returned")
151 | 
152 |         except Exception as e:
153 |             self.log_failure(tool_name, f"Exception: {e}")
154 | 
155 |     def log_success(self, test_name: str, message: str):
156 |         """Log successful test"""
157 |         self.results.append({"test": test_name, "status": "PASS", "message": message})
158 |         print(f"✅ {test_name}: {message}")
159 | 
160 |     def log_failure(self, test_name: str, message: str):
161 |         """Log failed test"""
162 |         self.results.append({"test": test_name, "status": "FAIL", "message": message})
163 |         print(f"❌ {test_name}: {message}")
164 | 
165 |     def print_summary(self):
166 |         """Print test summary"""
167 |         print("\n" + "=" * 50)
168 |         print("📊 TEST SUMMARY")
169 |         print("=" * 50)
170 | 
171 |         passed = sum(1 for r in self.results if r["status"] == "PASS")
172 |         failed = sum(1 for r in self.results if r["status"] == "FAIL")
173 |         total = len(self.results)
174 | 
175 |         print(f"Total Tests: {total}")
176 |         print(f"Passed: {passed} ✅")
177 |         print(f"Failed: {failed} ❌")
178 |         print(f"Success Rate: {(passed/total*100):.1f}%")
179 | 
180 |         if failed > 0:
181 |             print("\n🔍 FAILED TESTS:")
182 |             for result in self.results:
183 |                 if result["status"] == "FAIL":
184 |                     print(f"  ❌ {result['test']}: {result['message']}")
185 | 
186 |         print("\n🎯 OVERALL RESULT:", "PASS" if failed == 0 else "FAIL")
187 | 
188 |         return failed == 0
189 | 
190 | 
191 | async def main():
192 |     """Main test runner"""
193 |     test_suite = MCPServerTest()
194 |     success = await test_suite.run_all_tests()
195 |     sys.exit(0 if success else 1)
196 | 
197 | 
198 | if __name__ == "__main__":
199 |     asyncio.run(main())
200 | 


--------------------------------------------------------------------------------
/src/code_graph_mcp/file_watcher.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Debounced File Watcher for Code Graph MCP
  4 | 
  5 | Provides intelligent file system monitoring with debouncing to automatically
  6 | trigger graph updates when source files change.
  7 | """
  8 | 
  9 | import asyncio
 10 | import logging
 11 | import time
 12 | from pathlib import Path
 13 | from typing import Awaitable, Callable, Optional, Set, Union
 14 | 
 15 | from watchdog.events import FileSystemEvent, FileSystemEventHandler
 16 | from watchdog.observers import Observer
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | class DebouncedFileWatcher:
 22 |     """
 23 |     A debounced file system watcher that monitors source code files and triggers
 24 |     callbacks when changes are detected, with intelligent debouncing to prevent
 25 |     excessive re-analysis during bulk operations.
 26 |     """
 27 | 
 28 |     def __init__(
 29 |         self,
 30 |         project_root: Path,
 31 |         callback: Union[Callable[[], None], Callable[[], Awaitable[None]]],
 32 |         debounce_delay: float = 2.0,
 33 |         should_ignore_path: Optional[Callable[[Path, Path], bool]] = None,
 34 |         supported_extensions: Optional[Set[str]] = None,
 35 |     ):
 36 |         """
 37 |         Initialize the debounced file watcher.
 38 | 
 39 |         Args:
 40 |             project_root: Root directory to watch
 41 |             callback: Function to call when files change (sync or async)
 42 |             debounce_delay: Delay in seconds before triggering callback
 43 |             should_ignore_path: Function to check if a path should be ignored
 44 |             supported_extensions: Set of file extensions to watch
 45 |         """
 46 |         self.project_root = project_root
 47 |         self.callback = callback
 48 |         self.debounce_delay = debounce_delay
 49 |         self.should_ignore_path = should_ignore_path
 50 |         self.supported_extensions = supported_extensions or set()
 51 | 
 52 |         self._observer: Optional[Observer] = None
 53 |         self._debounce_task: Optional[asyncio.Task] = None
 54 |         self._last_change_time = 0
 55 |         self._is_running = False
 56 |         self._loop: Optional[asyncio.AbstractEventLoop] = None
 57 | 
 58 |         # Track recent changes to avoid duplicate processing
 59 |         self._recent_changes: Set[str] = set()
 60 |         self._change_cleanup_timer: Optional[float] = None
 61 | 
 62 |     class _EventHandler(FileSystemEventHandler):
 63 |         """Internal event handler for file system events."""
 64 | 
 65 |         def __init__(self, watcher: "DebouncedFileWatcher"):
 66 |             self.watcher = watcher
 67 |             super().__init__()
 68 | 
 69 |         def on_modified(self, event: FileSystemEvent) -> None:
 70 |             if not event.is_directory:
 71 |                 self.watcher._handle_file_change(Path(event.src_path))
 72 | 
 73 |         def on_created(self, event: FileSystemEvent) -> None:
 74 |             if not event.is_directory:
 75 |                 self.watcher._handle_file_change(Path(event.src_path))
 76 | 
 77 |         def on_deleted(self, event: FileSystemEvent) -> None:
 78 |             if not event.is_directory:
 79 |                 self.watcher._handle_file_change(Path(event.src_path))
 80 | 
 81 |         def on_moved(self, event: FileSystemEvent) -> None:
 82 |             if not event.is_directory:
 83 |                 # Handle both source and destination for moves
 84 |                 self.watcher._handle_file_change(Path(event.src_path))
 85 |                 if hasattr(event, 'dest_path'):
 86 |                     self.watcher._handle_file_change(Path(event.dest_path))
 87 | 
 88 |     def _should_watch_file(self, file_path: Path) -> bool:
 89 |         """Check if a file should be watched based on extension and ignore rules."""
 90 |         try:
 91 |             # Check if path should be ignored (e.g., .gitignore rules)
 92 |             if self.should_ignore_path and self.should_ignore_path(file_path, self.project_root):
 93 |                 return False
 94 | 
 95 |             # Check file extension
 96 |             if self.supported_extensions and file_path.suffix.lower() not in self.supported_extensions:
 97 |                 return False
 98 | 
 99 |             # Skip temporary files and common non-source files
100 |             if file_path.name.startswith('.') or file_path.name.endswith('~'):
101 |                 return False
102 | 
103 |             # Skip common temporary file patterns
104 |             temp_patterns = {'.tmp', '.temp', '.swp', '.swo', '.bak', '.orig'}
105 |             if any(file_path.name.endswith(pattern) for pattern in temp_patterns):
106 |                 return False
107 | 
108 |             return True
109 | 
110 |         except Exception as e:
111 |             logger.debug(f"Error checking if file should be watched: {file_path}: {e}")
112 |             return False
113 | 
114 |     def _handle_file_change(self, file_path: Path) -> None:
115 |         """Handle a file system change event."""
116 |         if not self._should_watch_file(file_path):
117 |             return
118 | 
119 |         # Convert to string for set operations
120 |         file_str = str(file_path)
121 | 
122 |         # Clean up old changes first
123 |         self._cleanup_recent_changes_if_needed()
124 | 
125 |         # Skip if we've recently processed this file
126 |         if file_str in self._recent_changes:
127 |             return
128 | 
129 |         # Add to recent changes and schedule cleanup
130 |         self._recent_changes.add(file_str)
131 |         self._schedule_change_cleanup()
132 | 
133 |         logger.debug(f"File change detected: {file_path}")
134 |         self._last_change_time = time.time()
135 | 
136 |         # Cancel existing debounce task and start a new one
137 |         if self._loop and self._loop.is_running():
138 |             if self._debounce_task and not self._debounce_task.done():
139 |                 self._debounce_task.cancel()
140 | 
141 |             # Schedule the debounced callback in the main event loop
142 |             self._loop.call_soon_threadsafe(self._create_debounce_task)
143 | 
144 |     def _create_debounce_task(self) -> None:
145 |         """Create the debounce task in the main event loop."""
146 |         self._debounce_task = asyncio.create_task(self._debounced_callback())
147 | 
148 |     def _schedule_change_cleanup(self) -> None:
149 |         """Schedule cleanup of recent changes tracking."""
150 |         # Use a simple timer instead of async task
151 |         self._change_cleanup_timer = time.time() + 10.0  # Clear after 10 seconds
152 | 
153 |     def _cleanup_recent_changes_if_needed(self) -> None:
154 |         """Clean up recent changes if enough time has passed."""
155 |         if (self._change_cleanup_timer and
156 |             time.time() > self._change_cleanup_timer):
157 |             # Log cleanup for monitoring
158 |             changes_count = len(self._recent_changes)
159 |             self._recent_changes.clear()
160 |             self._change_cleanup_timer = None
161 |             if changes_count > 0:
162 |                 logger.debug(f"File watcher cleanup: cleared {changes_count} recent changes")
163 | 
164 |     async def _debounced_callback(self) -> None:
165 |         """Execute the callback after the debounce delay."""
166 |         try:
167 |             await asyncio.sleep(self.debounce_delay)
168 | 
169 |             # Double-check that enough time has passed since the last change
170 |             time_since_change = time.time() - self._last_change_time
171 |             if time_since_change < self.debounce_delay:
172 |                 # More changes occurred, wait a bit more
173 |                 remaining_delay = self.debounce_delay - time_since_change
174 |                 await asyncio.sleep(remaining_delay)
175 | 
176 |             logger.info(f"Triggering callback after {self.debounce_delay}s debounce delay")
177 | 
178 |             # Handle both sync and async callbacks
179 |             result = self.callback()
180 |             if asyncio.iscoroutine(result):
181 |                 await result
182 | 
183 |         except asyncio.CancelledError:
184 |             logger.debug("Debounced callback cancelled")
185 |             raise  # Re-raise to properly handle cancellation
186 |         except Exception as e:
187 |             logger.error(f"Error in debounced callback: {e}")
188 |             # Don't re-raise to prevent crashing the file watcher
189 | 
190 |     async def start(self) -> None:
191 |         """Start watching for file changes."""
192 |         if self._is_running:
193 |             logger.warning("File watcher is already running")
194 |             return
195 | 
196 |         try:
197 |             # Store the current event loop
198 |             self._loop = asyncio.get_running_loop()
199 | 
200 |             self._observer = Observer()
201 |             event_handler = self._EventHandler(self)
202 | 
203 |             # Watch the project root recursively
204 |             self._observer.schedule(
205 |                 event_handler,
206 |                 str(self.project_root),
207 |                 recursive=True
208 |             )
209 | 
210 |             self._observer.start()
211 |             self._is_running = True
212 | 
213 |             logger.info(f"Started file watcher for: {self.project_root}")
214 | 
215 |         except Exception as e:
216 |             logger.error(f"Failed to start file watcher: {e}")
217 |             await self.stop()
218 |             raise
219 | 
220 |     async def stop(self) -> None:
221 |         """Stop watching for file changes."""
222 |         if not self._is_running:
223 |             return
224 | 
225 |         logger.info("Stopping file watcher...")
226 | 
227 |         # Cancel debounce task
228 |         if self._debounce_task and not self._debounce_task.done():
229 |             self._debounce_task.cancel()
230 |             try:
231 |                 await self._debounce_task
232 |             except asyncio.CancelledError:
233 |                 pass
234 | 
235 |         # Clear cleanup timer
236 |         self._change_cleanup_timer = None
237 | 
238 |         # Stop observer
239 |         if self._observer:
240 |             self._observer.stop()
241 |             self._observer.join(timeout=5.0)  # Wait up to 5 seconds
242 |             self._observer = None
243 | 
244 |         self._is_running = False
245 |         self._recent_changes.clear()
246 |         self._loop = None
247 |         logger.info("File watcher stopped")
248 | 
249 |     @property
250 |     def is_running(self) -> bool:
251 |         """Check if the file watcher is currently running."""
252 |         return self._is_running
253 | 
254 |     def get_stats(self) -> dict:
255 |         """Get statistics about the file watcher."""
256 |         return {
257 |             "is_running": self._is_running,
258 |             "project_root": str(self.project_root),
259 |             "debounce_delay": self.debounce_delay,
260 |             "recent_changes_count": len(self._recent_changes),
261 |             "last_change_time": self._last_change_time,
262 |             "has_pending_callback": self._debounce_task is not None and not self._debounce_task.done(),
263 |         }
264 | 


--------------------------------------------------------------------------------
/src/code_graph_mcp/universal_graph.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Universal Graph Data Structures
  3 | 
  4 | Language-agnostic data structures for representing code across multiple programming languages.
  5 | Provides a unified interface for AST nodes, relationships, and metadata.
  6 | """
  7 | 
  8 | from dataclasses import dataclass, field
  9 | from enum import Enum
 10 | from typing import Any, Dict, List, Optional, Set
 11 | 
 12 | # Standardized cache sizes for consistent memory usage
 13 | class CacheConfig:
 14 |     """Centralized cache size configuration for consistent memory management."""
 15 |     SMALL_CACHE = 1000      # For infrequent operations
 16 |     MEDIUM_CACHE = 10000    # For moderate frequency operations
 17 |     LARGE_CACHE = 50000     # For high frequency operations
 18 |     XLARGE_CACHE = 100000   # For very high frequency operations
 19 | 
 20 | 
 21 | class NodeType(Enum):
 22 |     """Universal node types that work across all programming languages."""
 23 | 
 24 |     MODULE = "module"
 25 |     CLASS = "class"
 26 |     FUNCTION = "function"
 27 |     VARIABLE = "variable"
 28 |     PARAMETER = "parameter"
 29 |     CONDITIONAL = "conditional"
 30 |     LOOP = "loop"
 31 |     EXCEPTION = "exception"
 32 |     INTERFACE = "interface"
 33 |     ENUM = "enum"
 34 |     NAMESPACE = "namespace"
 35 |     IMPORT = "import"
 36 |     LITERAL = "literal"
 37 |     CALL = "call"
 38 |     REFERENCE = "reference"
 39 | 
 40 | 
 41 | class RelationshipType(Enum):
 42 |     """Universal relationship types between code elements."""
 43 | 
 44 |     CONTAINS = "contains"
 45 |     INHERITS = "inherits"
 46 |     IMPLEMENTS = "implements"
 47 |     CALLS = "calls"
 48 |     IMPORTS = "imports"
 49 |     REFERENCES = "references"
 50 |     DEPENDS_ON = "depends_on"
 51 |     OVERRIDES = "overrides"
 52 |     EXTENDS = "extends"
 53 |     USES = "uses"
 54 | 
 55 | 
 56 | @dataclass
 57 | class UniversalLocation:
 58 |     """Universal location information for code elements."""
 59 | 
 60 |     file_path: str
 61 |     start_line: int
 62 |     end_line: int
 63 |     start_column: int = 0
 64 |     end_column: int = 0
 65 |     language: str = ""
 66 | 
 67 |     def __post_init__(self):
 68 |         """Validate location data after initialization."""
 69 |         if not self.file_path:
 70 |             raise ValueError("file_path cannot be empty")
 71 |         if self.start_line < 1:
 72 |             raise ValueError(f"start_line must be >= 1, got {self.start_line}")
 73 |         if self.end_line < self.start_line:
 74 |             raise ValueError(f"end_line ({self.end_line}) cannot be less than start_line ({self.start_line})")
 75 |         if self.start_column < 0:
 76 |             raise ValueError(f"start_column must be >= 0, got {self.start_column}")
 77 |         if self.end_column < 0:
 78 |             raise ValueError(f"end_column must be >= 0, got {self.end_column}")
 79 | 
 80 | 
 81 | @dataclass
 82 | class UniversalNode:
 83 |     """Universal representation of a code element."""
 84 | 
 85 |     id: str
 86 |     name: str
 87 |     node_type: NodeType
 88 |     location: UniversalLocation
 89 | 
 90 |     # Content and documentation
 91 |     content: str = ""
 92 |     docstring: Optional[str] = None
 93 | 
 94 |     # Code quality metrics
 95 |     complexity: int = 0
 96 |     line_count: int = 0
 97 | 
 98 |     # Language-specific metadata
 99 |     language: str = ""
100 |     metadata: Dict[str, Any] = field(default_factory=dict)
101 | 
102 |     # Visibility and access
103 |     visibility: str = "public"  # public, private, protected, internal
104 |     is_static: bool = False
105 |     is_abstract: bool = False
106 |     is_async: bool = False
107 | 
108 |     # Type information
109 |     return_type: Optional[str] = None
110 |     parameter_types: List[str] = field(default_factory=list)
111 | 
112 | 
113 | @dataclass
114 | class UniversalRelationship:
115 |     """Universal representation of relationships between code elements."""
116 | 
117 |     id: str
118 |     source_id: str
119 |     target_id: str
120 |     relationship_type: RelationshipType
121 | 
122 |     # Relationship metadata
123 |     metadata: Dict[str, Any] = field(default_factory=dict)
124 |     strength: float = 1.0  # Relationship strength (0.0 to 1.0)
125 | 
126 |     # Location where relationship is defined
127 |     location: Optional[UniversalLocation] = None
128 | 
129 | 
130 | class UniversalGraph:
131 |     """Universal code graph supporting multiple programming languages."""
132 | 
133 |     def __init__(self):
134 |         self.nodes: Dict[str, UniversalNode] = {}
135 |         self.relationships: Dict[str, UniversalRelationship] = {}
136 | 
137 |         # Indexed lookups for performance
138 |         self._nodes_by_type: Dict[NodeType, Set[str]] = {}
139 |         self._nodes_by_language: Dict[str, Set[str]] = {}
140 |         self._relationships_from: Dict[str, Set[str]] = {}
141 |         self._relationships_to: Dict[str, Set[str]] = {}
142 | 
143 |         # Graph metadata
144 |         self.metadata: Dict[str, Any] = {}
145 | 
146 |     def add_node(self, node: UniversalNode) -> None:
147 |         """Add a node to the graph with indexing."""
148 |         self.nodes[node.id] = node
149 | 
150 |         # Update indexes
151 |         if node.node_type not in self._nodes_by_type:
152 |             self._nodes_by_type[node.node_type] = set()
153 |         self._nodes_by_type[node.node_type].add(node.id)
154 | 
155 |         if node.language:
156 |             if node.language not in self._nodes_by_language:
157 |                 self._nodes_by_language[node.language] = set()
158 |             self._nodes_by_language[node.language].add(node.id)
159 | 
160 |     def add_relationship(self, relationship: UniversalRelationship) -> None:
161 |         """Add a relationship to the graph with indexing."""
162 |         self.relationships[relationship.id] = relationship
163 | 
164 |         # Update indexes
165 |         if relationship.source_id not in self._relationships_from:
166 |             self._relationships_from[relationship.source_id] = set()
167 |         self._relationships_from[relationship.source_id].add(relationship.id)
168 | 
169 |         if relationship.target_id not in self._relationships_to:
170 |             self._relationships_to[relationship.target_id] = set()
171 |         self._relationships_to[relationship.target_id].add(relationship.id)
172 | 
173 |     def get_node(self, node_id: str) -> Optional[UniversalNode]:
174 |         """Get a node by ID."""
175 |         return self.nodes.get(node_id)
176 | 
177 |     def get_nodes_by_type(self, node_type: NodeType) -> List[UniversalNode]:
178 |         """Get all nodes of a specific type."""
179 |         node_ids = self._nodes_by_type.get(node_type, set())
180 |         return [self.nodes[node_id] for node_id in node_ids if node_id in self.nodes]
181 | 
182 |     def get_nodes_by_language(self, language: str) -> List[UniversalNode]:
183 |         """Get all nodes for a specific language."""
184 |         node_ids = self._nodes_by_language.get(language, set())
185 |         return [self.nodes[node_id] for node_id in node_ids if node_id in self.nodes]
186 | 
187 |     def get_relationships_from(self, node_id: str) -> List[UniversalRelationship]:
188 |         """Get all relationships originating from a node."""
189 |         rel_ids = self._relationships_from.get(node_id, set())
190 |         return [self.relationships[rel_id] for rel_id in rel_ids if rel_id in self.relationships]
191 | 
192 |     def get_relationships_to(self, node_id: str) -> List[UniversalRelationship]:
193 |         """Get all relationships pointing to a node."""
194 |         rel_ids = self._relationships_to.get(node_id, set())
195 |         return [self.relationships[rel_id] for rel_id in rel_ids if rel_id in self.relationships]
196 | 
197 |     def get_relationships_by_type(self, relationship_type: RelationshipType) -> List[UniversalRelationship]:
198 |         """Get all relationships of a specific type."""
199 |         return [
200 |             rel for rel in self.relationships.values()
201 |             if rel.relationship_type == relationship_type
202 |         ]
203 | 
204 |     def find_nodes_by_name(self, name: str, exact_match: bool = True) -> List[UniversalNode]:
205 |         """Find nodes by name with optional fuzzy matching."""
206 |         if exact_match:
207 |             return [node for node in self.nodes.values() if node.name == name]
208 |         else:
209 |             name_lower = name.lower()
210 |             return [
211 |                 node for node in self.nodes.values()
212 |                 if name_lower in node.name.lower()
213 |             ]
214 | 
215 |     def get_connected_nodes(self, node_id: str, relationship_types: Optional[List[RelationshipType]] = None) -> List[UniversalNode]:
216 |         """Get all nodes connected to the given node."""
217 |         connected_ids = set()
218 | 
219 |         # Get outgoing relationships
220 |         for rel in self.get_relationships_from(node_id):
221 |             if not relationship_types or rel.relationship_type in relationship_types:
222 |                 connected_ids.add(rel.target_id)
223 | 
224 |         # Get incoming relationships
225 |         for rel in self.get_relationships_to(node_id):
226 |             if not relationship_types or rel.relationship_type in relationship_types:
227 |                 connected_ids.add(rel.source_id)
228 | 
229 |         return [self.nodes[node_id] for node_id in connected_ids if node_id in self.nodes]
230 | 
231 |     def get_statistics(self) -> Dict[str, Any]:
232 |         """Get comprehensive graph statistics."""
233 |         stats = {
234 |             "total_nodes": len(self.nodes),
235 |             "total_relationships": len(self.relationships),
236 |             "nodes_by_type": {},
237 |             "nodes_by_language": {},
238 |             "relationships_by_type": {},
239 |             "complexity_stats": {
240 |                 "total_complexity": 0,
241 |                 "average_complexity": 0.0,
242 |                 "max_complexity": 0,
243 |                 "high_complexity_functions": 0
244 |             }
245 |         }
246 | 
247 |         # Count nodes by type
248 |         for node_type, node_ids in self._nodes_by_type.items():
249 |             stats["nodes_by_type"][node_type.value] = len(node_ids)
250 | 
251 |         # Count nodes by language
252 |         for language, node_ids in self._nodes_by_language.items():
253 |             stats["nodes_by_language"][language] = len(node_ids)
254 | 
255 |         # Count relationships by type
256 |         for rel in self.relationships.values():
257 |             rel_type = rel.relationship_type.value
258 |             stats["relationships_by_type"][rel_type] = stats["relationships_by_type"].get(rel_type, 0) + 1
259 | 
260 |         # Calculate complexity statistics
261 |         complexities = [node.complexity for node in self.nodes.values() if node.complexity > 0]
262 |         if complexities:
263 |             stats["complexity_stats"]["total_complexity"] = sum(complexities)
264 |             stats["complexity_stats"]["average_complexity"] = sum(complexities) / len(complexities)
265 |             stats["complexity_stats"]["max_complexity"] = max(complexities)
266 |             stats["complexity_stats"]["high_complexity_functions"] = len([c for c in complexities if c > 10])
267 | 
268 |         return stats
269 | 
270 |     def export_graph_data(self) -> Dict[str, Any]:
271 |         """Export complete graph data for serialization."""
272 |         return {
273 |             "nodes": [
274 |                 {
275 |                     "id": node.id,
276 |                     "name": node.name,
277 |                     "type": node.node_type.value,
278 |                     "language": node.language,
279 |                     "location": {
280 |                         "file": node.location.file_path,
281 |                         "start_line": node.location.start_line,
282 |                         "end_line": node.location.end_line,
283 |                         "start_column": node.location.start_column,
284 |                         "end_column": node.location.end_column
285 |                     },
286 |                     "complexity": node.complexity,
287 |                     "line_count": node.line_count,
288 |                     "docstring": node.docstring,
289 |                     "visibility": node.visibility,
290 |                     "is_static": node.is_static,
291 |                     "is_abstract": node.is_abstract,
292 |                     "is_async": node.is_async,
293 |                     "return_type": node.return_type,
294 |                     "parameter_types": node.parameter_types,
295 |                     "metadata": node.metadata
296 |                 }
297 |                 for node in self.nodes.values()
298 |             ],
299 |             "relationships": [
300 |                 {
301 |                     "id": rel.id,
302 |                     "source_id": rel.source_id,
303 |                     "target_id": rel.target_id,
304 |                     "type": rel.relationship_type.value,
305 |                     "strength": rel.strength,
306 |                     "location": {
307 |                         "file": rel.location.file_path,
308 |                         "start_line": rel.location.start_line,
309 |                         "end_line": rel.location.end_line
310 |                     } if rel.location else None,
311 |                     "metadata": rel.metadata
312 |                 }
313 |                 for rel in self.relationships.values()
314 |             ],
315 |             "statistics": self.get_statistics(),
316 |             "metadata": self.metadata
317 |         }
318 | 
319 | 


--------------------------------------------------------------------------------
/tests/test_multi_language.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive tests for the multi-language code graph transformation.
  4 | Tests the universal parser, graph structures, and language detection.
  5 | """
  6 | 
  7 | import pytest
  8 | from pathlib import Path
  9 | import tempfile
 10 | import shutil
 11 | 
 12 | from src.code_graph_mcp.universal_parser import UniversalParser, LanguageRegistry
 13 | from src.code_graph_mcp.universal_graph import UniversalGraph, NodeType
 14 | from src.code_graph_mcp.universal_ast import UniversalASTAnalyzer
 15 | from src.code_graph_mcp.language_router import LanguageDetector, ProjectAnalyzer
 16 | 
 17 | 
 18 | class TestLanguageSupport:
 19 |     """Test multi-language support capabilities."""
 20 | 
 21 |     def test_language_registry_completeness(self):
 22 |         """Test that language registry supports 25+ languages."""
 23 |         registry = LanguageRegistry()
 24 | 
 25 |         assert registry.get_language_count() >= 25
 26 | 
 27 |         # Test specific languages are supported
 28 |         expected_languages = [
 29 |             'javascript', 'typescript', 'python', 'java', 'csharp',
 30 |             'cpp', 'c', 'rust', 'go', 'kotlin', 'scala', 'swift',
 31 |             'dart', 'ruby', 'php', 'elixir', 'elm', 'lua', 'html',
 32 |             'css', 'sql', 'yaml', 'json', 'xml', 'markdown'
 33 |         ]
 34 | 
 35 |         for lang in expected_languages:
 36 |             assert lang in registry.LANGUAGES, f"Missing language: {lang}"
 37 | 
 38 |     def test_file_extension_detection(self):
 39 |         """Test language detection by file extension."""
 40 |         registry = LanguageRegistry()
 41 | 
 42 |         test_cases = [
 43 |             ('.py', 'python'),
 44 |             ('.js', 'javascript'),
 45 |             ('.ts', 'typescript'),
 46 |             ('.java', 'java'),
 47 |             ('.rs', 'rust'),
 48 |             ('.go', 'go'),
 49 |             ('.cpp', 'cpp'),
 50 |             ('.html', 'html'),
 51 |             ('.css', 'css'),
 52 |             ('.json', 'json'),
 53 |         ]
 54 | 
 55 |         for ext, expected_lang in test_cases:
 56 |             file_path = Path(f"test{ext}")
 57 |             config = registry.get_language_by_extension(file_path)
 58 |             assert config is not None, f"No config found for {ext}"
 59 |             assert expected_lang in config.name.lower() or (expected_lang == 'cpp' and 'c++' in config.name.lower()), f"Wrong language for {ext}: got {config.name}"
 60 | 
 61 | 
 62 | class TestUniversalParser:
 63 |     """Test the universal parser with multiple languages."""
 64 | 
 65 |     @pytest.fixture
 66 |     def temp_project(self):
 67 |         """Create a temporary multi-language project."""
 68 |         temp_dir = Path(tempfile.mkdtemp())
 69 | 
 70 |         # Create test files in different languages
 71 |         test_files = {
 72 |             'main.py': '''
 73 | def hello_world():
 74 |     """Say hello to the world."""
 75 |     print("Hello from Python!")
 76 | 
 77 | class Calculator:
 78 |     def add(self, a, b):
 79 |         return a + b
 80 | ''',
 81 |             'app.js': '''
 82 | function helloWorld() {
 83 |     console.log("Hello from JavaScript!");
 84 | }
 85 | 
 86 | class Calculator {
 87 |     add(a, b) {
 88 |         return a + b;
 89 |     }
 90 | }
 91 | ''',
 92 |             'Main.java': '''
 93 | public class Main {
 94 |     public static void main(String[] args) {
 95 |         System.out.println("Hello from Java!");
 96 |     }
 97 | 
 98 |     public static class Calculator {
 99 |         public int add(int a, int b) {
100 |             return a + b;
101 |         }
102 |     }
103 | }
104 | ''',
105 |             'hello.rs': '''
106 | fn main() {
107 |     println!("Hello from Rust!");
108 | }
109 | 
110 | struct Calculator;
111 | 
112 | impl Calculator {
113 |     fn add(&self, a: i32, b: i32) -> i32 {
114 |         a + b
115 |     }
116 | }
117 | '''
118 |         }
119 | 
120 |         for filename, content in test_files.items():
121 |             file_path = temp_dir / filename
122 |             file_path.write_text(content)
123 | 
124 |         yield temp_dir
125 | 
126 |         # Cleanup
127 |         shutil.rmtree(temp_dir)
128 | 
129 |     def test_single_file_parsing(self, temp_project):
130 |         """Test parsing individual files in different languages."""
131 |         parser = UniversalParser(temp_project)
132 | 
133 |         # Test Python file
134 |         python_file = temp_project / 'main.py'
135 |         python_graph = parser.parse_file(python_file)
136 | 
137 |         assert python_graph is not None
138 |         assert len(python_graph.nodes) > 0
139 |         assert 'python' in python_graph.languages
140 | 
141 |         # Check for functions and classes
142 |         functions = python_graph.get_nodes_by_type(NodeType.FUNCTION)
143 |         classes = python_graph.get_nodes_by_type(NodeType.CLASS)
144 | 
145 |         assert len(functions) >= 1  # hello_world and add methods
146 |         assert len(classes) >= 1    # Calculator class
147 | 
148 |     def test_directory_parsing(self, temp_project):
149 |         """Test parsing entire multi-language directory."""
150 |         parser = UniversalParser(temp_project)
151 | 
152 |         combined_graph = parser.parse_directory()
153 | 
154 |         # Should have parsed multiple languages
155 |         assert len(combined_graph.languages) >= 3  # Python, JavaScript, Java, Rust
156 |         assert combined_graph.file_count >= 4
157 | 
158 |         # Should have nodes from all languages
159 |         total_nodes = len(combined_graph.nodes)
160 |         assert total_nodes > 10  # Multiple functions and classes across languages
161 | 
162 |         # Test language distribution
163 |         assert 'python' in combined_graph.languages
164 |         assert 'javascript' in combined_graph.languages
165 |         assert 'java' in combined_graph.languages
166 | 
167 | 
168 | class TestLanguageDetection:
169 |     """Test intelligent language detection."""
170 | 
171 |     def test_extension_detection(self):
172 |         """Test detection by file extension."""
173 |         detector = LanguageDetector()
174 | 
175 |         test_cases = [
176 |             ('test.py', 'Python'),
177 |             ('app.js', 'JavaScript'),
178 |             ('Main.java', 'Java'),
179 |             ('hello.rs', 'Rust'),
180 |             ('main.go', 'Go'),
181 |         ]
182 | 
183 |         for filename, expected_lang in test_cases:
184 |             file_path = Path(filename)
185 |             config = detector.detect_file_language(file_path)
186 |             assert config is not None
187 |             assert expected_lang.lower() in config.name.lower()
188 | 
189 |     def test_content_signature_detection(self):
190 |         """Test detection by content patterns."""
191 |         detector = LanguageDetector()
192 | 
193 |         # Test Python content
194 |         python_content = '''
195 | def main():
196 |     import os
197 |     print("Hello Python")
198 |     if __name__ == "__main__":
199 |         main()
200 | '''
201 | 
202 |         detected = detector._detect_by_content_signatures(python_content)
203 |         assert detected == 'python'
204 | 
205 |         # Test JavaScript content
206 |         js_content = '''
207 | function main() {
208 |     const message = "Hello JavaScript";
209 |     console.log(message);
210 | }
211 | module.exports = main;
212 | '''
213 | 
214 |         detected = detector._detect_by_content_signatures(js_content)
215 |         assert detected == 'javascript'
216 | 
217 | 
218 | class TestUniversalGraph:
219 |     """Test universal graph structures work across languages."""
220 | 
221 |     def test_node_creation(self):
222 |         """Test creating universal nodes for different languages."""
223 |         from src.code_graph_mcp.universal_graph import UniversalNode, SourceLocation
224 | 
225 |         # Create nodes for different languages
226 |         python_node = UniversalNode(
227 |             id="py_func_1",
228 |             node_type=NodeType.FUNCTION,
229 |             name="calculate",
230 |             qualified_name="math.calculate",
231 |             location=SourceLocation(Path("test.py"), 1, 1, 10, 1),
232 |             language="python",
233 |             raw_kind="function_definition"
234 |         )
235 | 
236 |         js_node = UniversalNode(
237 |             id="js_func_1",
238 |             node_type=NodeType.FUNCTION,
239 |             name="calculate",
240 |             qualified_name="math.calculate",
241 |             location=SourceLocation(Path("test.js"), 1, 1, 10, 1),
242 |             language="javascript",
243 |             raw_kind="function_declaration"
244 |         )
245 | 
246 |         # Both should have same universal type despite different raw kinds
247 |         assert python_node.node_type == js_node.node_type
248 |         assert python_node.node_type == NodeType.FUNCTION
249 | 
250 |     def test_graph_multi_language_operations(self):
251 |         """Test graph operations work across multiple languages."""
252 |         from src.code_graph_mcp.universal_graph import UniversalNode, SourceLocation
253 | 
254 |         graph = UniversalGraph()
255 | 
256 |         # Add nodes from different languages
257 |         languages = ['python', 'javascript', 'java', 'rust']
258 |         for i, lang in enumerate(languages):
259 |             node = UniversalNode(
260 |                 id=f"{lang}_node_{i}",
261 |                 node_type=NodeType.FUNCTION,
262 |                 name=f"func_{i}",
263 |                 qualified_name=f"module.func_{i}",
264 |                 location=SourceLocation(Path(f"test.{lang[:2]}"), 1, 1, 5, 1),
265 |                 language=lang,
266 |                 raw_kind="function"
267 |             )
268 |             graph.add_node(node)
269 | 
270 |         # Test multi-language queries
271 |         assert len(graph.languages) == 4
272 |         assert graph.get_nodes_by_language('python')
273 |         assert graph.get_nodes_by_language('javascript')
274 |         assert len(graph.get_nodes_by_type(NodeType.FUNCTION)) == 4
275 | 
276 | 
277 | class TestUniversalASTAnalyzer:
278 |     """Test cross-language AST analysis capabilities."""
279 | 
280 |     @pytest.fixture
281 |     def sample_graph(self):
282 |         """Create a sample multi-language graph."""
283 |         from src.code_graph_mcp.universal_graph import UniversalNode, SourceLocation
284 | 
285 |         graph = UniversalGraph()
286 | 
287 |         # Add some test nodes
288 |         for i in range(5):
289 |             node = UniversalNode(
290 |                 id=f"func_{i}",
291 |                 node_type=NodeType.FUNCTION,
292 |                 name=f"function_{i}",
293 |                 qualified_name=f"module.function_{i}",
294 |                 location=SourceLocation(Path("test.py"), i, 1, i+5, 1),
295 |                 language="python",
296 |                 raw_kind="function_definition",
297 |                 complexity=i * 3 + 1,  # Varying complexity
298 |                 line_count=i * 10 + 5   # Varying size
299 |             )
300 |             graph.add_node(node)
301 | 
302 |         return graph
303 | 
304 |     def test_code_smell_detection(self, sample_graph):
305 |         """Test cross-language code smell detection."""
306 |         from src.code_graph_mcp.universal_parser import UniversalParser
307 | 
308 |         parser = UniversalParser(Path("."))
309 |         analyzer = UniversalASTAnalyzer(parser)
310 | 
311 |         smells = analyzer.detect_code_smells(sample_graph)
312 | 
313 |         # Should detect different types of smells
314 |         assert 'long_functions' in smells
315 |         assert 'complex_functions' in smells
316 |         assert 'large_classes' in smells
317 | 
318 |         # Should have some complex functions (complexity > 15)
319 |         complex_funcs = [node for node in sample_graph.nodes.values() if node.complexity > 15]
320 |         assert len(smells['complex_functions']) == len(complex_funcs)
321 | 
322 |     def test_maintainability_calculation(self, sample_graph):
323 |         """Test maintainability index calculation."""
324 |         from src.code_graph_mcp.universal_parser import UniversalParser
325 | 
326 |         parser = UniversalParser(Path("."))
327 |         analyzer = UniversalASTAnalyzer(parser)
328 | 
329 |         maintainability = analyzer.calculate_maintainability_index(sample_graph)
330 | 
331 |         # Should return a score between 0 and 100
332 |         assert 0 <= maintainability <= 100
333 |         assert isinstance(maintainability, float)
334 | 
335 | 
336 | class TestProjectAnalysis:
337 |     """Test project-level multi-language analysis."""
338 | 
339 |     @pytest.fixture
340 |     def complex_project(self):
341 |         """Create a complex multi-language project structure."""
342 |         temp_dir = Path(tempfile.mkdtemp())
343 | 
344 |         # Create directory structure
345 |         (temp_dir / 'src').mkdir()
346 |         (temp_dir / 'tests').mkdir()
347 |         (temp_dir / 'docs').mkdir()
348 | 
349 |         # Create files
350 |         files = {
351 |             'package.json': '{"name": "test", "dependencies": {"react": "^18.0.0"}}',
352 |             'src/main.py': 'def main(): pass',
353 |             'src/app.js': 'function app() {}',
354 |             'src/Main.java': 'public class Main {}',
355 |             'tests/test_main.py': 'def test_main(): assert True',
356 |             'docs/README.md': '# Test Project',
357 |             '.github/workflows/ci.yml': 'name: CI'
358 |         }
359 | 
360 |         for filepath, content in files.items():
361 |             full_path = temp_dir / filepath
362 |             full_path.parent.mkdir(parents=True, exist_ok=True)
363 |             full_path.write_text(content)
364 | 
365 |         yield temp_dir
366 |         shutil.rmtree(temp_dir)
367 | 
368 |     def test_project_analysis(self, complex_project):
369 |         """Test comprehensive project analysis."""
370 |         analyzer = ProjectAnalyzer()
371 |         profile = analyzer.analyze_project(complex_project)
372 | 
373 |         # Should detect multiple languages
374 |         assert len(profile.languages) >= 3
375 |         assert 'python' in profile.languages
376 |         assert 'javascript' in profile.languages
377 |         assert 'java' in profile.languages
378 | 
379 |         # Should detect frameworks
380 |         assert 'react' in profile.framework_hints or 'npm' in profile.framework_hints
381 | 
382 |         # Should detect project structure
383 |         assert profile.has_tests
384 |         assert profile.has_documentation
385 |         assert profile.has_ci_config
386 | 
387 |         # Should have reasonable confidence
388 |         assert profile.confidence_score > 0.5
389 | 
390 | 
391 | def test_integration_end_to_end():
392 |     """Integration test of the entire multi-language pipeline."""
393 |     # Create temporary project
394 |     temp_dir = Path(tempfile.mkdtemp())
395 | 
396 |     try:
397 |         # Create multi-language files
398 |         files = {
399 |             'main.py': 'def hello(): print("Python")',
400 |             'app.js': 'function hello() { console.log("JS"); }',
401 |             'Main.java': 'class Main { void hello() { System.out.println("Java"); } }'
402 |         }
403 | 
404 |         for filename, content in files.items():
405 |             (temp_dir / filename).write_text(content)
406 | 
407 |         # Test complete pipeline
408 |         parser = UniversalParser(temp_dir)
409 |         graph = parser.parse_directory()
410 | 
411 |         # Verify multi-language support works end-to-end
412 |         assert len(graph.languages) >= 3
413 |         assert len(graph.nodes) >= 6  # 3 files * ~2 nodes each
414 | 
415 |         # Test analysis
416 |         analyzer = UniversalASTAnalyzer(parser)
417 |         functions = graph.get_nodes_by_type(NodeType.FUNCTION)
418 | 
419 |         if functions:
420 |             result = analyzer.analyze_function(functions[0])
421 |             assert result.node is not None
422 |             assert result.complexity >= 1
423 | 
424 |     finally:
425 |         shutil.rmtree(temp_dir)
426 | 
427 | 
428 | if __name__ == "__main__":
429 |     pytest.main([__file__, "-v"])
430 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Code Graph MCP Server
  2 | 
  3 | Model Context Protocol server providing comprehensive code analysis, navigation, and quality assessment capabilities **across 25+ programming languages**.
  4 | 
  5 | ## Features
  6 | 
  7 | 🎯 **Enhanced Tool Guidance & AI Optimization** ⭐ *NEW in v1.2.0*
  8 | - **Comprehensive Usage Guide** - Built-in `get_usage_guide` tool with workflows, best practices, and examples
  9 | - **Rich Tool Descriptions** - Visual hierarchy with 🎯 PURPOSE, 🔧 USAGE, ⚡ PERFORMANCE, 🔄 WORKFLOW, 💡 TIP sections
 10 | - **Performance-Aware Design** - Clear expectations for Fast (<3s), Moderate (3-15s), and Expensive (10-60s) operations
 11 | - **Workflow Orchestration** - Optimal tool sequences for Code Exploration, Refactoring Analysis, and Architecture Analysis
 12 | - **AI Model Optimization** - Reduces trial-and-error, improves tool orchestration, enables strategic usage patterns
 13 | 
 14 | 🌍 **Multi-Language Support**
 15 | - **25+ Programming Languages**: JavaScript, TypeScript, Python, Java, C#, C++, C, Rust, Go, Kotlin, Scala, Swift, Dart, Ruby, PHP, Elixir, Elm, Lua, HTML, CSS, SQL, YAML, JSON, XML, Markdown, Haskell, OCaml, F#
 16 | - **Intelligent Language Detection**: Extension-based, MIME type, shebang, and content signature analysis
 17 | - **Framework Recognition**: React, Angular, Vue, Django, Flask, Spring, and 15+ more
 18 | - **Universal AST Abstraction**: Language-agnostic code analysis and graph structures
 19 | 
 20 | 🔍 **Advanced Code Analysis**
 21 | - Complete codebase structure analysis with metrics across all languages
 22 | - Universal AST parsing with ast-grep backend and intelligent caching
 23 | - Cyclomatic complexity calculation with language-specific patterns
 24 | - Project health scoring and maintainability indexing
 25 | - Code smell detection: long functions, complex logic, duplicate patterns
 26 | - Cross-language similarity analysis and pattern matching
 27 | 
 28 | 🧭 **Navigation & Search**
 29 | - Symbol definition lookup across mixed-language codebases
 30 | - Reference tracking across files and languages
 31 | - Function caller/callee analysis with cross-language calls
 32 | - Dependency mapping and circular dependency detection
 33 | - Call graph generation across entire project
 34 | 
 35 | ⚡ **Performance Optimized**
 36 | - **Debounced File Watcher** - Automatic re-analysis when files change with 2-second intelligent debouncing
 37 | - **Real-time Updates** - Code graph automatically updates during active development
 38 | - Aggressive LRU caching with 50-90% speed improvements on repeated operations
 39 | - Cache sizes optimized for 500+ file codebases (up to 300K entries)
 40 | - Sub-microsecond response times on cache hits
 41 | - Memory-efficient universal graph building
 42 | 
 43 | 🏢 **Enterprise Ready**
 44 | - Production-quality error handling across all languages
 45 | - Comprehensive logging and monitoring with language context
 46 | - UV package management with ast-grep integration
 47 | 
 48 | ## Installation
 49 | 
 50 | ### Quick Start (PyPI)
 51 | 
 52 | ```bash
 53 | pip install code-graph-mcp ast-grep-py rustworkx
 54 | ```
 55 | 
 56 | ## MCP Host Integration
 57 | 
 58 | ### Claude Desktop
 59 | 
 60 | #### Method 1: Using Claude CLI (Recommended)
 61 | 
 62 | **For PyPI installation:**
 63 | ```bash
 64 | # Project-specific installation
 65 | claude mcp add --scope project code-graph-mcp code-graph-mcp
 66 | 
 67 | # User-wide installation  
 68 | claude mcp add --scope user code-graph-mcp code-graph-mcp
 69 | ```
 70 | 
 71 | **For development installation:**
 72 | ```bash
 73 | # Project-specific installation
 74 | claude mcp add --scope project code-graph-mcp uv run code-graph-mcp
 75 | 
 76 | # User-wide installation  
 77 | claude mcp add --scope user code-graph-mcp uv run code-graph-mcp
 78 | ```
 79 | 
 80 | **Verify installation:**
 81 | ```bash
 82 | claude mcp list
 83 | ```
 84 | 
 85 | #### Method 2: Manual Configuration
 86 | Add to your Claude Desktop configuration file:
 87 | 
 88 | **macOS**: `~/Library/Application Support/Claude/claude_desktop_config.json`
 89 | **Windows**: `%APPDATA%\Claude\claude_desktop_config.json`
 90 | 
 91 | ```json
 92 | {
 93 |   "mcpServers": {
 94 |     "code-graph-mcp": {
 95 |       "command": "code-graph-mcp"
 96 |     }
 97 |   }
 98 | }
 99 | ```
100 | 
101 | ### Cline (VS Code Extension)
102 | 
103 | Add to your Cline MCP settings in VS Code:
104 | 
105 | 1. Open VS Code Settings (Ctrl/Cmd + ,)
106 | 2. Search for "Cline MCP"
107 | 3. Add server configuration:
108 | 
109 | ```json
110 | {
111 |   "cline.mcp.servers": {
112 |     "code-graph-mcp": {
113 |       "command": "code-graph-mcp"
114 |     }
115 |   }
116 | }
117 | ```
118 | 
119 | ### Continue (VS Code Extension)
120 | 
121 | Add to your `~/.continue/config.json`:
122 | 
123 | ```json
124 | {
125 |   "mcpServers": [
126 |     {
127 |       "name": "code-graph-mcp",
128 |       "command": "code-graph-mcp",
129 |       "env": {}
130 |     }
131 |   ]
132 | }
133 | ```
134 | 
135 | ### Cursor
136 | 
137 | Add to Cursor's MCP configuration:
138 | 
139 | 1. Open Cursor Settings
140 | 2. Navigate to Extensions → MCP
141 | 3. Add server:
142 | 
143 | ```json
144 | {
145 |   "name": "code-graph-mcp",
146 |   "command": "code-graph-mcp"
147 | }
148 | ```
149 | 
150 | ### Zed Editor
151 | 
152 | Add to your Zed `settings.json`:
153 | 
154 | ```json
155 | {
156 |   "assistant": {
157 |     "mcp_servers": {
158 |       "code-graph-mcp": {
159 |         "command": "code-graph-mcp"
160 |       }
161 |     }
162 |   }
163 | }
164 | ```
165 | 
166 | ### Zencoder ⭐
167 | 
168 | **The best AI coding tool!** Add to your Zencoder MCP configuration:
169 | 
170 | ```json
171 | {
172 |   "mcpServers": {
173 |     "code-graph-mcp": {
174 |       "command": "code-graph-mcp",
175 |       "env": {},
176 |       "description": "Multi-language code analysis with 25+ language support"
177 |     }
178 |   }
179 | }
180 | ```
181 | 
182 | **Pro Tip**: Zencoder's advanced AI capabilities work exceptionally well with Code Graph MCP's comprehensive multi-language analysis. Perfect combination for professional development! 🚀
183 | 
184 | ### Windsurf
185 | 
186 | Add to Windsurf's MCP configuration:
187 | 
188 | ```json
189 | {
190 |   "mcpServers": {
191 |     "code-graph-mcp": {
192 |       "command": "code-graph-mcp"
193 |     }
194 |   }
195 | }
196 | ```
197 | 
198 | ### Aider
199 | 
200 | Use with Aider AI coding assistant:
201 | 
202 | ```bash
203 | aider --mcp-server code-graph-mcp
204 | ```
205 | 
206 | ### Open WebUI
207 | 
208 | For Open WebUI integration, add to your MCP configuration:
209 | 
210 | ```json
211 | {
212 |   "mcp_servers": {
213 |     "code-graph-mcp": {
214 |       "command": "code-graph-mcp",
215 |       "env": {}
216 |     }
217 |   }
218 | }
219 | ```
220 | 
221 | ### Generic MCP Client
222 | 
223 | For any MCP-compatible client, use these connection details:
224 | 
225 | ```json
226 | {
227 |   "name": "code-graph-mcp",
228 |   "command": "code-graph-mcp",
229 |   "env": {}
230 | }
231 | ```
232 | 
233 | ### Docker Integration
234 | 
235 | Run as a containerized MCP server:
236 | 
237 | ```dockerfile
238 | FROM python:3.12-slim
239 | RUN pip install code-graph-mcp ast-grep-py rustworkx
240 | WORKDIR /workspace
241 | CMD ["code-graph-mcp"]
242 | ```
243 | 
244 | ```bash
245 | docker run -v $(pwd):/workspace code-graph-mcp
246 | ```
247 | 
248 | ### Development Installation
249 | 
250 | For contributing or custom builds:
251 | 
252 | ```bash
253 | git clone <repository-url>
254 | cd code-graph-mcp
255 | uv sync --dev
256 | uv build
257 | ```
258 | 
259 | **Add to Claude Code (development):**
260 | ```bash
261 | # Project-specific
262 | claude mcp add --scope project code-graph-mcp uv run code-graph-mcp
263 | 
264 | # User-wide
265 | claude mcp add --scope user code-graph-mcp uv run code-graph-mcp
266 | ```
267 | 
268 | **For other MCP clients, use:**
269 | ```json
270 | {
271 |   "command": "uv",
272 |   "args": ["run", "code-graph-mcp"]
273 | }
274 | ```
275 | 
276 | ## Configuration Options
277 | 
278 | ### Command Line Arguments
279 | 
280 | ```bash
281 | code-graph-mcp --help
282 | ```
283 | 
284 | Available options:
285 | - `--project-root PATH`: Root directory of your project (optional, defaults to current directory)
286 | - `--verbose`: Enable detailed logging
287 | - `--no-file-watcher`: Disable automatic file change detection
288 | 
289 | ### Environment Variables
290 | 
291 | ```bash
292 | export CODE_GRAPH_MCP_LOG_LEVEL=DEBUG
293 | export CODE_GRAPH_MCP_CACHE_SIZE=500000
294 | export CODE_GRAPH_MCP_MAX_FILES=10000
295 | export CODE_GRAPH_MCP_FILE_WATCHER=true
296 | export CODE_GRAPH_MCP_DEBOUNCE_DELAY=2.0
297 | ```
298 | 
299 | ### File Watcher (v1.1.0+)
300 | 
301 | The server includes an intelligent file watcher that automatically updates the code graph when files change:
302 | 
303 | - **Automatic Detection**: Monitors all supported file types in your project
304 | - **Smart Debouncing**: 2-second delay prevents excessive re-analysis during rapid changes
305 | - **Efficient Filtering**: Respects `.gitignore` patterns and only watches relevant files
306 | - **Thread-Safe**: Runs in background without blocking analysis operations
307 | - **Zero Configuration**: Starts automatically after first analysis
308 | 
309 | **File Watcher Features:**
310 | - Real-time graph updates during development
311 | - Batch processing of multiple rapid changes
312 | - Duplicate change prevention
313 | - Graceful error recovery
314 | - Resource cleanup on shutdown
315 | 
316 | ### Troubleshooting
317 | 
318 | #### Common Issues
319 | 
320 | 1. **"Command not found"**: Ensure `code-graph-mcp` is in your PATH
321 |    ```bash
322 |    pip install --upgrade code-graph-mcp
323 |    which code-graph-mcp
324 |    ```
325 | 
326 | 2. **"ast-grep not found"**: Install the required dependency
327 |    ```bash
328 |    pip install ast-grep-py
329 |    ```
330 | 
331 | 3. **Permission errors**: Use virtual environment
332 |    ```bash
333 |    python -m venv venv
334 |    source venv/bin/activate  # Linux/Mac
335 |    # or
336 |    venv\Scripts\activate     # Windows
337 |    pip install code-graph-mcp ast-grep-py rustworkx
338 |    ```
339 | 
340 | 4. **Large project performance**: Use verbose mode for debugging
341 |    ```bash
342 |    code-graph-mcp --verbose
343 |    ```
344 | 
345 | #### Debug Mode
346 | 
347 | Enable verbose logging for troubleshooting:
348 | 
349 | ```bash
350 | code-graph-mcp --verbose
351 | ```
352 | 
353 | #### Supported File Types
354 | 
355 | The server automatically detects and analyzes these file extensions:
356 | - **Web**: `.js`, `.ts`, `.jsx`, `.tsx`, `.html`, `.css`
357 | - **Backend**: `.py`, `.java`, `.cs`, `.cpp`, `.c`, `.rs`, `.go`
358 | - **Mobile**: `.swift`, `.dart`, `.kt`
359 | - **Scripting**: `.rb`, `.php`, `.lua`, `.pl`
360 | - **Config**: `.json`, `.yaml`, `.yml`, `.toml`, `.xml`
361 | - **Docs**: `.md`, `.rst`, `.txt`
362 | 
363 | ## Available Tools
364 | 
365 | The MCP server provides **9 comprehensive analysis tools** with enhanced guidance that work across all 25+ supported languages:
366 | 
367 | ### 🎯 **Enhanced Tool Experience** ⭐ *NEW in v1.2.0*
368 | 
369 | Each tool now includes **rich guidance** with visual hierarchy:
370 | - **🎯 PURPOSE** - Clear explanation of what the tool does
371 | - **🔧 USAGE** - When and how to use the tool effectively  
372 | - **⚡ PERFORMANCE** - Speed expectations and caching information
373 | - **🔄 WORKFLOW** - Optimal tool sequencing recommendations
374 | - **💡 TIP** - Pro tips for maximum effectiveness
375 | 
376 | ### 📚 **Usage Guide Tool**
377 | | Tool | Description | Key Features |
378 | |------|-------------|--------------|
379 | | `get_usage_guide` | **NEW** - Comprehensive guidance with workflows, best practices, and examples | Complete documentation, workflow patterns, performance guidelines |
380 | 
381 | ### 🛠️ **Analysis Tools**
382 | | Tool | Description | Multi-Language Features | Performance |
383 | |------|-------------|------------------------|-------------|
384 | | `analyze_codebase` | Complete project analysis with structure metrics and complexity assessment | Language detection, framework identification, cross-language dependency mapping | ⚡ Expensive (10-60s) |
385 | | `find_definition` | Locate symbol definitions with detailed metadata and documentation | Universal AST traversal, language-agnostic symbol resolution | ⚡ Fast (<3s) |
386 | | `find_references` | Find all references to symbols throughout the codebase | Cross-file and cross-language reference tracking | ⚡ Fast (<3s) |
387 | | `find_callers` | Identify all functions that call a specified function | Multi-language call graph analysis | ⚡ Fast (<3s) |
388 | | `find_callees` | List all functions called by a specified function | Universal function call detection across languages | ⚡ Fast (<3s) |
389 | | `complexity_analysis` | Analyze code complexity with refactoring recommendations | Language-specific complexity patterns, universal metrics | ⚡ Moderate (5-15s) |
390 | | `dependency_analysis` | Generate module dependency graphs and import relationships | Cross-language dependency detection, circular dependency analysis | ⚡ Moderate (3-10s) |
391 | | `project_statistics` | Comprehensive project health metrics and statistics | Multi-language project profiling, maintainability indexing | ⚡ Fast (<3s) |
392 | 
393 | ## Usage Examples
394 | 
395 | ### 🎯 **Getting Started with Enhanced Guidance** ⭐ *NEW in v1.2.0*
396 | 
397 | ```
398 | First, get comprehensive guidance on using the tools effectively:
399 | get_usage_guide
400 | ```
401 | 
402 | ### 🔍 **Multi-Language Analysis Workflows**
403 | 
404 | **Code Exploration Workflow:**
405 | ```
406 | 1. analyze_codebase (build the foundation)
407 | 2. project_statistics (get overview)  
408 | 3. find_definition("MyClass") (locate specific symbols)
409 | 4. find_references("MyClass") (understand usage patterns)
410 | ```
411 | 
412 | **Refactoring Analysis Workflow:**
413 | ```
414 | 1. analyze_codebase
415 | 2. complexity_analysis (threshold=15 for critical issues)
416 | 3. find_callers("complex_function") (impact analysis)
417 | 4. find_callees("complex_function") (dependency analysis)
418 | ```
419 | 
420 | **Architecture Analysis Workflow:**
421 | ```
422 | 1. analyze_codebase
423 | 2. dependency_analysis (identify circular dependencies)
424 | 3. project_statistics (health metrics)
425 | 4. complexity_analysis (quality assessment)
426 | ```
427 | 
428 | ### 💬 **Natural Language Examples**
429 | 
430 | ```
431 | Analyze this React/TypeScript frontend with Python backend - show me the overall structure and complexity metrics
432 | ```
433 | 
434 | ```
435 | Find all references to the function "authenticate" across both the Java services and JavaScript frontend
436 | ```
437 | 
438 | ```
439 | Show me functions with complexity higher than 15 across all languages that need refactoring
440 | ```
441 | 
442 | ```
443 | Generate a dependency graph showing how the Python API connects to the React components
444 | ```
445 | 
446 | ```
447 | Detect code smells and duplicate patterns across the entire multi-language codebase
448 | ```
449 | 
450 | ## Development
451 | 
452 | ### Requirements
453 | - Python 3.12+
454 | - UV package manager
455 | - MCP SDK
456 | - ast-grep-py (for multi-language support)
457 | - rustworkx (for high-performance graph operations)
458 | 
459 | ### Running locally
460 | ```bash
461 | # Install dependencies
462 | uv sync
463 | 
464 | # Run the server directly (auto-detects current directory)
465 | uv run code-graph-mcp --verbose
466 | 
467 | # Test with help
468 | uv run code-graph-mcp --help
469 | ```
470 | 
471 | ### Performance Features
472 | 
473 | - **LRU Caching**: 50-90% speed improvements with cache sizes up to 300K entries for large codebases
474 | - **High-Performance Analytics**: PageRank at 4.9M nodes/second, Betweenness Centrality at 104K nodes/second
475 | - **Sub-microsecond Response**: Cache hits deliver sub-microsecond response times for repeated operations
476 | - **Memory Optimized**: Cache configurations optimized for 500+ file codebases with 500MB memory allocation
477 | - **Comprehensive Benchmarks**: Performance monitoring with detailed cache effectiveness metrics
478 | 
479 | ## Supported Languages
480 | 
481 | | Category | Languages | Count |
482 | |----------|-----------|-------|
483 | | **Web & Frontend** | JavaScript, TypeScript, HTML, CSS | 4 |
484 | | **Backend & Systems** | Python, Java, C#, C++, C, Rust, Go | 7 |
485 | | **JVM Languages** | Java, Kotlin, Scala | 3 |  
486 | | **Functional** | Elixir, Elm | 2 |
487 | | **Mobile** | Swift, Dart | 2 |
488 | | **Scripting** | Ruby, PHP, Lua | 3 |
489 | | **Data & Config** | SQL, YAML, JSON, TOML | 4 |
490 | | **Markup & Docs** | XML, Markdown | 2 |
491 | | **Additional** | Haskell, OCaml, F# | 3 |
492 | | **Total** | | **25+** |
493 | 
494 | ## Status
495 | 
496 | ✅ **Multi-Language Support** - 25+ programming languages with ast-grep backend  
497 | ✅ **MCP SDK integrated** - Full protocol compliance across all languages  
498 | ✅ **Universal Architecture** - Language-agnostic graph structures and analysis  
499 | ✅ **Server architecture complete** - Enterprise-grade multi-language structure  
500 | ✅ **Core tools implemented** - 8 comprehensive analysis tools working across all languages  
501 | ✅ **Performance optimized** - Multi-language AST caching with intelligent routing  
502 | ✅ **Production ready** - comprehensive error handling, defensive security


--------------------------------------------------------------------------------
/tests/test_rustworkx_performance.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Performance benchmarks for RustworkxCodeGraph functionality.
  4 | 
  5 | This module provides benchmarks to demonstrate the performance improvements
  6 | offered by the rustworkx-backed graph implementation.
  7 | """
  8 | 
  9 | import time
 10 | import pytest
 11 | from typing import List, Dict
 12 | 
 13 | from code_graph_mcp.rustworkx_graph import RustworkxCodeGraph
 14 | from code_graph_mcp.universal_graph import (
 15 |     UniversalNode, UniversalRelationship, UniversalLocation,
 16 |     NodeType, RelationshipType
 17 | )
 18 | 
 19 | 
 20 | class PerformanceBenchmarks:
 21 |     """Performance benchmarks for rustworkx functionality."""
 22 | 
 23 |     def create_large_graph(self, num_nodes: int = 1000, connectivity: float = 0.3) -> RustworkxCodeGraph:
 24 |         """Create a large graph for performance testing."""
 25 |         graph = RustworkxCodeGraph()
 26 | 
 27 |         print(f"Creating graph with {num_nodes} nodes...")
 28 | 
 29 |         # Create nodes
 30 |         start_time = time.time()
 31 |         nodes = []
 32 |         for i in range(num_nodes):
 33 |             node = UniversalNode(
 34 |                 id=f"node_{i}",
 35 |                 name=f"Function_{i}",
 36 |                 node_type=NodeType.FUNCTION if i % 4 != 0 else NodeType.CLASS,
 37 |                 location=UniversalLocation(
 38 |                     file_path=f"/test/file_{i // 50}.py",
 39 |                     start_line=10 + (i % 100),
 40 |                     end_line=20 + (i % 100),
 41 |                     language="Python"
 42 |                 ),
 43 |                 language="Python",
 44 |                 complexity=(i % 20) + 1,
 45 |                 metadata={"module": f"module_{i // 100}"}
 46 |             )
 47 |             nodes.append(node)
 48 |             graph.add_node(node)
 49 | 
 50 |         node_creation_time = time.time() - start_time
 51 |         print(f"Node creation took: {node_creation_time:.3f}s")
 52 | 
 53 |         # Create relationships based on connectivity
 54 |         start_time = time.time()
 55 |         import random
 56 |         relationships = []
 57 |         num_relationships = int(num_nodes * connectivity)
 58 | 
 59 |         for i in range(num_relationships):
 60 |             source_idx = random.randint(0, num_nodes - 1)
 61 |             target_idx = random.randint(0, num_nodes - 1)
 62 | 
 63 |             if source_idx != target_idx:  # Avoid self-loops
 64 |                 rel_type = random.choice([
 65 |                     RelationshipType.CALLS,
 66 |                     RelationshipType.REFERENCES,
 67 |                     RelationshipType.CONTAINS
 68 |                 ])
 69 | 
 70 |                 rel = UniversalRelationship(
 71 |                     id=f"rel_{i}_{source_idx}_{target_idx}",
 72 |                     source_id=f"node_{source_idx}",
 73 |                     target_id=f"node_{target_idx}",
 74 |                     relationship_type=rel_type,
 75 |                     strength=random.uniform(0.1, 1.0)
 76 |                 )
 77 |                 relationships.append(rel)
 78 |                 graph.add_relationship(rel)
 79 | 
 80 |         relationship_creation_time = time.time() - start_time
 81 |         print(f"Relationship creation took: {relationship_creation_time:.3f}s")
 82 |         print(f"Created {len(relationships)} relationships")
 83 | 
 84 |         return graph
 85 | 
 86 |     def benchmark_centrality_algorithms(self, graph: RustworkxCodeGraph) -> Dict[str, float]:
 87 |         """Benchmark centrality calculation algorithms."""
 88 |         print("\\n=== Centrality Algorithm Benchmarks ===")
 89 |         results = {}
 90 | 
 91 |         # Betweenness centrality
 92 |         start_time = time.time()
 93 |         betweenness = graph.calculate_centrality()
 94 |         betweenness_time = time.time() - start_time
 95 |         results['betweenness'] = betweenness_time
 96 |         print(f"Betweenness centrality: {betweenness_time:.3f}s ({len(betweenness)} nodes)")
 97 | 
 98 |         # PageRank
 99 |         start_time = time.time()
100 |         pagerank = graph.calculate_pagerank(alpha=0.85, max_iter=100, tol=1e-6)
101 |         pagerank_time = time.time() - start_time
102 |         results['pagerank'] = pagerank_time
103 |         print(f"PageRank: {pagerank_time:.3f}s ({len(pagerank)} nodes)")
104 | 
105 |         # Closeness centrality
106 |         start_time = time.time()
107 |         closeness = graph.calculate_closeness_centrality()
108 |         closeness_time = time.time() - start_time
109 |         results['closeness'] = closeness_time
110 |         print(f"Closeness centrality: {closeness_time:.3f}s ({len(closeness)} nodes)")
111 | 
112 |         # Eigenvector centrality
113 |         start_time = time.time()
114 |         eigenvector = graph.calculate_eigenvector_centrality(max_iter=100)
115 |         eigenvector_time = time.time() - start_time
116 |         results['eigenvector'] = eigenvector_time
117 |         print(f"Eigenvector centrality: {eigenvector_time:.3f}s ({len(eigenvector)} nodes)")
118 | 
119 |         return results
120 | 
121 |     def benchmark_structural_analysis(self, graph: RustworkxCodeGraph) -> Dict[str, float]:
122 |         """Benchmark structural analysis algorithms."""
123 |         print("\\n=== Structural Analysis Benchmarks ===")
124 |         results = {}
125 | 
126 |         # Cycle detection
127 |         start_time = time.time()
128 |         cycles = graph.detect_cycles()
129 |         cycle_time = time.time() - start_time
130 |         results['cycles'] = cycle_time
131 |         print(f"Cycle detection: {cycle_time:.3f}s ({len(cycles)} cycles found)")
132 | 
133 |         # Strongly connected components
134 |         start_time = time.time()
135 |         components = graph.get_strongly_connected_components()
136 |         scc_time = time.time() - start_time
137 |         results['scc'] = scc_time
138 |         print(f"Strongly connected components: {scc_time:.3f}s ({len(components)} components)")
139 | 
140 |         # Articulation points
141 |         start_time = time.time()
142 |         articulation_points = graph.find_articulation_points()
143 |         articulation_time = time.time() - start_time
144 |         results['articulation'] = articulation_time
145 |         print(f"Articulation points: {articulation_time:.3f}s ({len(articulation_points)} points)")
146 | 
147 |         # Bridges
148 |         start_time = time.time()
149 |         bridges = graph.find_bridges()
150 |         bridges_time = time.time() - start_time
151 |         results['bridges'] = bridges_time
152 |         print(f"Bridge finding: {bridges_time:.3f}s ({len(bridges)} bridges)")
153 | 
154 |         # DAG check
155 |         start_time = time.time()
156 |         is_dag = graph.is_directed_acyclic()
157 |         dag_time = time.time() - start_time
158 |         results['dag'] = dag_time
159 |         print(f"DAG check: {dag_time:.3f}s (Result: {is_dag})")
160 | 
161 |         return results
162 | 
163 |     def benchmark_path_algorithms(self, graph: RustworkxCodeGraph, sample_nodes: List[str]) -> Dict[str, float]:
164 |         """Benchmark shortest path algorithms."""
165 |         print("\\n=== Path Algorithm Benchmarks ===")
166 |         results = {}
167 | 
168 |         if len(sample_nodes) < 2:
169 |             print("Not enough nodes for path benchmarks")
170 |             return results
171 | 
172 |         source, target = sample_nodes[0], sample_nodes[1]
173 | 
174 |         # Shortest path
175 |         start_time = time.time()
176 |         shortest_path = graph.find_shortest_path(source, target)
177 |         shortest_path_time = time.time() - start_time
178 |         results['shortest_path'] = shortest_path_time
179 |         print(f"Shortest path: {shortest_path_time:.3f}s (Length: {len(shortest_path)})")
180 | 
181 |         # All paths (limited)
182 |         start_time = time.time()
183 |         all_paths = graph.find_all_paths(source, target, max_length=5)
184 |         all_paths_time = time.time() - start_time
185 |         results['all_paths'] = all_paths_time
186 |         print(f"All paths (max 5): {all_paths_time:.3f}s ({len(all_paths)} paths)")
187 | 
188 |         # Distance matrix (Floyd-Warshall) - only for smaller graphs
189 |         if len(graph.nodes) <= 200:  # Limit to avoid excessive computation
190 |             start_time = time.time()
191 |             distance_matrix = graph.calculate_graph_distance_matrix()
192 |             distance_matrix_time = time.time() - start_time
193 |             results['distance_matrix'] = distance_matrix_time
194 |             total_distances = sum(len(targets) for targets in distance_matrix.values())
195 |             print(f"Distance matrix: {distance_matrix_time:.3f}s ({total_distances} distances)")
196 | 
197 |         # Bellman-Ford path lengths
198 |         start_time = time.time()
199 |         bellman_ford = graph.calculate_bellman_ford_path_lengths()
200 |         bellman_ford_time = time.time() - start_time
201 |         results['bellman_ford'] = bellman_ford_time
202 |         total_bf_distances = sum(len(targets) for targets in bellman_ford.values())
203 |         print(f"Bellman-Ford paths: {bellman_ford_time:.3f}s ({total_bf_distances} distances)")
204 | 
205 |         return results
206 | 
207 |     def benchmark_traversal_algorithms(self, graph: RustworkxCodeGraph, sample_nodes: List[str]) -> Dict[str, float]:
208 |         """Benchmark graph traversal algorithms."""
209 |         print("\\n=== Traversal Algorithm Benchmarks ===")
210 |         results = {}
211 | 
212 |         if not sample_nodes:
213 |             print("No nodes for traversal benchmarks")
214 |             return results
215 | 
216 |         source = sample_nodes[0]
217 | 
218 |         # DFS
219 |         start_time = time.time()
220 |         dfs_nodes = graph.depth_first_search(source)
221 |         dfs_time = time.time() - start_time
222 |         results['dfs'] = dfs_time
223 |         print(f"DFS traversal: {dfs_time:.3f}s ({len(dfs_nodes)} nodes visited)")
224 | 
225 |         # BFS
226 |         start_time = time.time()
227 |         bfs_nodes = graph.breadth_first_search(source)
228 |         bfs_time = time.time() - start_time
229 |         results['bfs'] = bfs_time
230 |         print(f"BFS traversal: {bfs_time:.3f}s ({len(bfs_nodes)} nodes visited)")
231 | 
232 |         # Node layers
233 |         start_time = time.time()
234 |         layers = graph.find_node_layers(source)
235 |         layers_time = time.time() - start_time
236 |         results['layers'] = layers_time
237 |         total_nodes_in_layers = sum(len(nodes) for nodes in layers.values())
238 |         print(f"Node layers: {layers_time:.3f}s ({len(layers)} layers, {total_nodes_in_layers} nodes)")
239 | 
240 |         return results
241 | 
242 |     def benchmark_serialization(self, graph: RustworkxCodeGraph) -> Dict[str, float]:
243 |         """Benchmark serialization methods."""
244 |         print("\\n=== Serialization Benchmarks ===")
245 |         results = {}
246 | 
247 |         # JSON serialization
248 |         start_time = time.time()
249 |         json_data = graph.to_json()
250 |         json_time = time.time() - start_time
251 |         results['json'] = json_time
252 |         print(f"JSON serialization: {json_time:.3f}s ({len(json_data)} characters)")
253 | 
254 |         # DOT serialization
255 |         start_time = time.time()
256 |         dot_data = graph.to_dot()
257 |         dot_time = time.time() - start_time
258 |         results['dot'] = dot_time
259 |         print(f"DOT serialization: {dot_time:.3f}s ({len(dot_data)} characters)")
260 | 
261 |         # Statistics generation
262 |         start_time = time.time()
263 |         graph.get_statistics()
264 |         stats_time = time.time() - start_time
265 |         results['statistics'] = stats_time
266 |         print(f"Statistics generation: {stats_time:.3f}s")
267 | 
268 |         return results
269 | 
270 |     def run_comprehensive_benchmark(self, num_nodes: int = 500):
271 |         """Run comprehensive performance benchmarks."""
272 |         print(f"\\n{'='*60}")
273 |         print("RUSTWORKX CODE GRAPH PERFORMANCE BENCHMARK")
274 |         print(f"Graph size: {num_nodes} nodes")
275 |         print(f"{'='*60}")
276 | 
277 |         # Create test graph
278 |         total_start_time = time.time()
279 |         graph = self.create_large_graph(num_nodes, connectivity=0.3)
280 | 
281 |         # Get sample nodes for path testing
282 |         sample_nodes = list(graph.nodes.keys())[:10]
283 | 
284 |         # Run benchmarks
285 |         benchmark_results = {}
286 |         benchmark_results['centrality'] = self.benchmark_centrality_algorithms(graph)
287 |         benchmark_results['structural'] = self.benchmark_structural_analysis(graph)
288 |         benchmark_results['paths'] = self.benchmark_path_algorithms(graph, sample_nodes)
289 |         benchmark_results['traversal'] = self.benchmark_traversal_algorithms(graph, sample_nodes)
290 |         benchmark_results['serialization'] = self.benchmark_serialization(graph)
291 | 
292 |         total_time = time.time() - total_start_time
293 | 
294 |         # Summary
295 |         print("\\n" + "="*60)
296 |         print("BENCHMARK SUMMARY")
297 |         print("="*60)
298 |         print(f"Total benchmark time: {total_time:.3f}s")
299 |         print("Graph statistics:")
300 |         stats = graph.get_statistics()
301 |         print(f"  - Nodes: {stats['total_nodes']}")
302 |         print(f"  - Relationships: {stats['total_relationships']}")
303 |         print(f"  - Density: {stats['density']:.4f}")
304 |         print(f"  - Average degree: {stats['average_degree']:.2f}")
305 | 
306 |         # Performance highlights
307 |         print("\\nPerformance highlights:")
308 |         if 'pagerank' in benchmark_results['centrality']:
309 |             pagerank_time = benchmark_results['centrality']['pagerank']
310 |             nodes_per_sec = stats['total_nodes'] / pagerank_time if pagerank_time > 0 else 0
311 |             print(f"  - PageRank: {nodes_per_sec:.0f} nodes/second")
312 | 
313 |         if 'betweenness' in benchmark_results['centrality']:
314 |             betweenness_time = benchmark_results['centrality']['betweenness']
315 |             nodes_per_sec = stats['total_nodes'] / betweenness_time if betweenness_time > 0 else 0
316 |             print(f"  - Betweenness centrality: {nodes_per_sec:.0f} nodes/second")
317 | 
318 |         if 'cycles' in benchmark_results['structural']:
319 |             cycles_time = benchmark_results['structural']['cycles']
320 |             edges_per_sec = stats['total_relationships'] / cycles_time if cycles_time > 0 else 0
321 |             print(f"  - Cycle detection: {edges_per_sec:.0f} edges/second")
322 | 
323 |         return benchmark_results
324 | 
325 | 
326 | @pytest.mark.performance
327 | class TestPerformanceBenchmarks:
328 |     """Test class for performance benchmarks."""
329 | 
330 |     def test_small_graph_performance(self):
331 |         """Test performance with a small graph (fast test)."""
332 |         benchmarks = PerformanceBenchmarks()
333 |         results = benchmarks.run_comprehensive_benchmark(num_nodes=100)
334 | 
335 |         # Basic assertions that operations completed
336 |         assert 'centrality' in results
337 |         assert 'structural' in results
338 |         assert 'serialization' in results
339 | 
340 |     @pytest.mark.slow
341 |     def test_medium_graph_performance(self):
342 |         """Test performance with a medium graph (slower test)."""
343 |         benchmarks = PerformanceBenchmarks()
344 |         results = benchmarks.run_comprehensive_benchmark(num_nodes=500)
345 | 
346 |         # Verify all benchmark categories completed
347 |         expected_categories = ['centrality', 'structural', 'paths', 'traversal', 'serialization']
348 |         for category in expected_categories:
349 |             assert category in results
350 | 
351 |     @pytest.mark.slow
352 |     def test_large_graph_performance(self):
353 |         """Test performance with a large graph (very slow test)."""
354 |         benchmarks = PerformanceBenchmarks()
355 |         results = benchmarks.run_comprehensive_benchmark(num_nodes=1000)
356 | 
357 |         # Verify operations scale reasonably
358 |         assert 'centrality' in results
359 | 
360 |         # PageRank should complete in reasonable time even for large graphs
361 |         if 'pagerank' in results['centrality']:
362 |             assert results['centrality']['pagerank'] < 10.0  # Should complete in under 10 seconds
363 | 
364 |     def test_connectivity_analysis_performance(self):
365 |         """Test performance of connectivity analysis features."""
366 |         benchmarks = PerformanceBenchmarks()
367 |         graph = benchmarks.create_large_graph(num_nodes=200, connectivity=0.4)
368 | 
369 |         # Test comprehensive connectivity analysis
370 |         start_time = time.time()
371 |         connectivity = graph.analyze_graph_connectivity()
372 |         analysis_time = time.time() - start_time
373 | 
374 |         print(f"\\nConnectivity analysis took: {analysis_time:.3f}s")
375 | 
376 |         # Verify analysis completed and has expected structure
377 |         assert 'basic_metrics' in connectivity
378 |         assert 'connectivity_metrics' in connectivity
379 |         assert 'distance_metrics' in connectivity
380 | 
381 |         # Should complete in reasonable time
382 |         assert analysis_time < 30.0  # Should complete in under 30 seconds
383 | 
384 | 
385 | if __name__ == "__main__":
386 |     # Run benchmarks directly
387 |     benchmarks = PerformanceBenchmarks()
388 | 
389 |     print("Running performance benchmarks...")
390 |     print("Note: This will take several minutes to complete.")
391 | 
392 |     # Run different sized benchmarks
393 |     for size in [100, 200, 500]:
394 |         print(f"\\n{'='*80}")
395 |         print(f"RUNNING BENCHMARK FOR {size} NODES")
396 |         print(f"{'='*80}")
397 |         benchmarks.run_comprehensive_benchmark(num_nodes=size)
398 |         print("\\n" + "="*80)
399 | 


--------------------------------------------------------------------------------
/tests/test_mcp_rustworkx_integration.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Integration tests for MCP server with rustworkx backend.
  4 | 
  5 | Tests the complete integration of rustworkx functionality with the MCP server,
  6 | ensuring that all tools work correctly with the high-performance graph backend.
  7 | """
  8 | 
  9 | import json
 10 | import pytest
 11 | import time
 12 | from unittest.mock import patch
 13 | 
 14 | from code_graph_mcp.server import UniversalAnalysisEngine
 15 | from code_graph_mcp.rustworkx_graph import RustworkxCodeGraph
 16 | from code_graph_mcp.universal_graph import (
 17 |     UniversalNode, UniversalRelationship, UniversalLocation,
 18 |     NodeType, RelationshipType
 19 | )
 20 | 
 21 | 
 22 | class TestMCPRustworkxIntegration:
 23 |     """Integration tests for MCP server with rustworkx backend."""
 24 | 
 25 |     @pytest.fixture
 26 |     def mock_project_root(self, tmp_path):
 27 |         """Create a mock project root with sample Python files."""
 28 |         # Create sample Python files
 29 |         main_file = tmp_path / "main.py"
 30 |         main_file.write_text('''
 31 | def main():
 32 |     """Main function."""
 33 |     print("Hello, world!")
 34 |     helper_function()
 35 |     return 42
 36 | 
 37 | def helper_function():
 38 |     """Helper function."""
 39 |     data = process_data([1, 2, 3])
 40 |     return data
 41 | 
 42 | def process_data(items):
 43 |     """Process a list of items."""
 44 |     return [x * 2 for x in items]
 45 | 
 46 | class DataProcessor:
 47 |     """Class for processing data."""
 48 | 
 49 |     def __init__(self):
 50 |         self.data = []
 51 | 
 52 |     def add_data(self, item):
 53 |         """Add data item."""
 54 |         self.data.append(item)
 55 | 
 56 |     def process(self):
 57 |         """Process all data."""
 58 |         return process_data(self.data)
 59 | ''')
 60 | 
 61 |         utils_file = tmp_path / "utils.py"
 62 |         utils_file.write_text('''
 63 | import json
 64 | from typing import List, Dict, Any
 65 | 
 66 | def load_config(filename: str) -> Dict[str, Any]:
 67 |     """Load configuration from JSON file."""
 68 |     with open(filename, 'r') as f:
 69 |         return json.load(f)
 70 | 
 71 | def save_results(data: List[Any], filename: str) -> None:
 72 |     """Save results to JSON file."""
 73 |     with open(filename, 'w') as f:
 74 |         json.dump(data, f, indent=2)
 75 | 
 76 | class ConfigManager:
 77 |     """Manages application configuration."""
 78 | 
 79 |     def __init__(self, config_file: str):
 80 |         self.config_file = config_file
 81 |         self.config = load_config(config_file)
 82 | 
 83 |     def get(self, key: str, default=None):
 84 |         """Get configuration value."""
 85 |         return self.config.get(key, default)
 86 | 
 87 |     def update(self, key: str, value: Any):
 88 |         """Update configuration value."""
 89 |         self.config[key] = value
 90 |         save_results(self.config, self.config_file)
 91 | ''')
 92 | 
 93 |         return tmp_path
 94 | 
 95 |     @pytest.fixture
 96 |     def analysis_engine(self, mock_project_root):
 97 |         """Create analysis engine with mock project."""
 98 |         with patch('code_graph_mcp.server.UniversalAnalysisEngine._ensure_analyzed'):
 99 |             engine = UniversalAnalysisEngine(mock_project_root)
100 | 
101 |             # Create a sample graph directly for testing
102 |             graph = RustworkxCodeGraph()
103 | 
104 |             # Add sample nodes
105 |             nodes = [
106 |                 UniversalNode(
107 |                     id="file:main.py",
108 |                     name="main.py",
109 |                     node_type=NodeType.MODULE,
110 |                     location=UniversalLocation(
111 |                         file_path=str(mock_project_root / "main.py"),
112 |                         start_line=1,
113 |                         end_line=30,
114 |                         language="Python"
115 |                     ),
116 |                     language="Python",
117 |                     line_count=30
118 |                 ),
119 |                 UniversalNode(
120 |                     id="function:main.py:main:2",
121 |                     name="main",
122 |                     node_type=NodeType.FUNCTION,
123 |                     location=UniversalLocation(
124 |                         file_path=str(mock_project_root / "main.py"),
125 |                         start_line=2,
126 |                         end_line=6,
127 |                         language="Python"
128 |                     ),
129 |                     language="Python",
130 |                     complexity=3,
131 |                     docstring="Main function."
132 |                 ),
133 |                 UniversalNode(
134 |                     id="function:main.py:helper_function:8",
135 |                     name="helper_function",
136 |                     node_type=NodeType.FUNCTION,
137 |                     location=UniversalLocation(
138 |                         file_path=str(mock_project_root / "main.py"),
139 |                         start_line=8,
140 |                         end_line=11,
141 |                         language="Python"
142 |                     ),
143 |                     language="Python",
144 |                     complexity=2,
145 |                     docstring="Helper function."
146 |                 ),
147 |                 UniversalNode(
148 |                     id="function:main.py:process_data:13",
149 |                     name="process_data",
150 |                     node_type=NodeType.FUNCTION,
151 |                     location=UniversalLocation(
152 |                         file_path=str(mock_project_root / "main.py"),
153 |                         start_line=13,
154 |                         end_line=15,
155 |                         language="Python"
156 |                     ),
157 |                     language="Python",
158 |                     complexity=1,
159 |                     docstring="Process a list of items."
160 |                 ),
161 |                 UniversalNode(
162 |                     id="class:main.py:DataProcessor:17",
163 |                     name="DataProcessor",
164 |                     node_type=NodeType.CLASS,
165 |                     location=UniversalLocation(
166 |                         file_path=str(mock_project_root / "main.py"),
167 |                         start_line=17,
168 |                         end_line=30,
169 |                         language="Python"
170 |                     ),
171 |                     language="Python",
172 |                     docstring="Class for processing data."
173 |                 )
174 |             ]
175 | 
176 |             for node in nodes:
177 |                 graph.add_node(node)
178 | 
179 |             # Add sample relationships
180 |             relationships = [
181 |                 UniversalRelationship(
182 |                     id="contains:file:main:function:main",
183 |                     source_id="file:main.py",
184 |                     target_id="function:main.py:main:2",
185 |                     relationship_type=RelationshipType.CONTAINS
186 |                 ),
187 |                 UniversalRelationship(
188 |                     id="contains:file:main:function:helper",
189 |                     source_id="file:main.py",
190 |                     target_id="function:main.py:helper_function:8",
191 |                     relationship_type=RelationshipType.CONTAINS
192 |                 ),
193 |                 UniversalRelationship(
194 |                     id="calls:main:helper",
195 |                     source_id="function:main.py:main:2",
196 |                     target_id="function:main.py:helper_function:8",
197 |                     relationship_type=RelationshipType.CALLS,
198 |                     metadata={"call_line": 5}
199 |                 ),
200 |                 UniversalRelationship(
201 |                     id="calls:helper:process_data",
202 |                     source_id="function:main.py:helper_function:8",
203 |                     target_id="function:main.py:process_data:13",
204 |                     relationship_type=RelationshipType.CALLS,
205 |                     metadata={"call_line": 10}
206 |                 )
207 |             ]
208 | 
209 |             for rel in relationships:
210 |                 graph.add_relationship(rel)
211 | 
212 |             # Replace the engine's graph with our test graph
213 |             engine.graph = graph
214 |             engine._is_analyzed = True
215 | 
216 |             return engine
217 | 
218 |     def test_project_statistics_with_rustworkx(self, analysis_engine):
219 |         """Test project statistics generation with rustworkx backend."""
220 |         stats = analysis_engine.get_project_stats()
221 | 
222 |         # Verify basic statistics
223 |         assert stats["total_nodes"] > 0
224 |         assert stats["total_relationships"] > 0
225 |         assert "node_types" in stats
226 |         assert "last_analysis" in stats
227 | 
228 |         # Verify node types are present
229 |         node_types = stats["node_types"]
230 |         assert "module" in node_types
231 |         assert "function" in node_types
232 |         assert "class" in node_types
233 | 
234 |     def test_find_definition_with_rustworkx(self, analysis_engine):
235 |         """Test symbol definition finding with rustworkx backend."""
236 |         # Test finding a function
237 |         main_defs = analysis_engine.find_symbol_definition("main")
238 |         assert len(main_defs) > 0
239 | 
240 |         main_def = main_defs[0]
241 |         assert main_def["name"] == "main"
242 |         assert main_def["type"] == "function"
243 |         assert main_def["complexity"] == 3
244 |         assert "Main function" in main_def["documentation"]
245 | 
246 |         # Test finding a class
247 |         class_defs = analysis_engine.find_symbol_definition("DataProcessor")
248 |         assert len(class_defs) > 0
249 | 
250 |         class_def = class_defs[0]
251 |         assert class_def["name"] == "DataProcessor"
252 |         assert class_def["type"] == "class"
253 | 
254 |     def test_find_references_with_rustworkx(self, analysis_engine):
255 |         """Test symbol reference finding with rustworkx backend."""
256 |         # This would typically find references to symbols
257 |         # For now, test that the method works without errors
258 |         references = analysis_engine.find_symbol_references("process_data")
259 |         assert isinstance(references, list)
260 | 
261 |     def test_find_callers_with_rustworkx(self, analysis_engine):
262 |         """Test finding function callers with rustworkx backend."""
263 |         # Test finding callers of helper_function
264 |         callers = analysis_engine.find_function_callers("helper_function")
265 | 
266 |         # Should find that main() calls helper_function()
267 |         assert len(callers) > 0
268 |         caller = callers[0]
269 |         assert caller["caller"] == "main"
270 |         assert caller["target_function"] == "helper_function"
271 |         assert caller["caller_type"] == "function"
272 | 
273 |     def test_find_callees_with_rustworkx(self, analysis_engine):
274 |         """Test finding function callees with rustworkx backend."""
275 |         # Test finding functions called by helper_function
276 |         callees = analysis_engine.find_function_callees("helper_function")
277 | 
278 |         # Should find that helper_function() calls process_data()
279 |         assert len(callees) > 0
280 |         callee = callees[0]
281 |         assert callee["callee"] == "process_data"
282 |         assert callee["callee_type"] == "function"
283 | 
284 |     def test_complexity_analysis_with_rustworkx(self, analysis_engine):
285 |         """Test complexity analysis with rustworkx backend."""
286 |         # Test with low threshold to catch all functions
287 |         complex_functions = analysis_engine.analyze_complexity(threshold=1)
288 | 
289 |         assert len(complex_functions) > 0
290 | 
291 |         # Check that functions have expected complexity data
292 |         for func in complex_functions:
293 |             assert "name" in func
294 |             assert "complexity" in func
295 |             assert "risk_level" in func
296 |             assert "file" in func
297 |             assert "line" in func
298 |             assert func["complexity"] >= 1
299 | 
300 |     def test_dependency_analysis_with_rustworkx(self, analysis_engine):
301 |         """Test dependency analysis with rustworkx enhanced features."""
302 |         deps = analysis_engine.get_dependency_graph()
303 | 
304 |         # Verify basic structure
305 |         assert "total_files" in deps
306 |         assert "total_dependencies" in deps
307 |         assert "dependencies" in deps
308 | 
309 |         # Verify rustworkx enhancements
310 |         assert "circular_dependencies" in deps
311 |         assert "is_directed_acyclic" in deps
312 |         assert "strongly_connected_components" in deps
313 |         assert "graph_density" in deps
314 | 
315 |         # Test that rustworkx analysis completed
316 |         assert isinstance(deps["is_directed_acyclic"], bool)
317 |         assert isinstance(deps["circular_dependencies"], list)
318 |         assert isinstance(deps["graph_density"], (int, float))
319 | 
320 |     def test_code_insights_with_rustworkx(self, analysis_engine):
321 |         """Test advanced code insights with rustworkx analytics."""
322 |         insights = analysis_engine.get_code_insights()
323 | 
324 |         # Verify comprehensive analytics structure
325 |         assert "centrality_analysis" in insights
326 |         assert "structural_analysis" in insights
327 |         assert "graph_statistics" in insights
328 |         assert "topology_analysis" in insights
329 | 
330 |         # Test centrality analysis
331 |         centrality = insights["centrality_analysis"]
332 |         assert "betweenness_centrality" in centrality
333 |         assert "pagerank" in centrality
334 |         assert "closeness_centrality" in centrality
335 |         assert "eigenvector_centrality" in centrality
336 | 
337 |         # Test structural analysis
338 |         structural = insights["structural_analysis"]
339 |         assert "articulation_points" in structural
340 |         assert "bridges" in structural
341 | 
342 |         # Test topology analysis
343 |         topology = insights["topology_analysis"]
344 |         assert "is_directed_acyclic" in topology
345 |         assert "num_cycles" in topology
346 |         assert "strongly_connected_components" in topology
347 | 
348 |         # Verify that centrality calculations return results
349 |         if centrality["betweenness_centrality"]:
350 |             node_info = centrality["betweenness_centrality"][0]
351 |             assert "node_id" in node_info
352 |             assert "score" in node_info
353 |             assert "node_name" in node_info
354 |             assert "node_type" in node_info
355 | 
356 |     def test_graph_performance_metrics(self, analysis_engine):
357 |         """Test performance characteristics of rustworkx backend."""
358 |         import time
359 | 
360 |         # Test that basic operations are fast
361 |         start_time = time.time()
362 |         stats = analysis_engine.get_project_stats()
363 |         stats_time = time.time() - start_time
364 | 
365 |         start_time = time.time()
366 |         insights = analysis_engine.get_code_insights()
367 |         insights_time = time.time() - start_time
368 | 
369 |         # Operations should complete quickly for small graphs
370 |         assert stats_time < 1.0  # Less than 1 second
371 |         assert insights_time < 5.0  # Less than 5 seconds
372 | 
373 |         # Verify we got meaningful results
374 |         assert stats["total_nodes"] > 0
375 |         assert len(insights["centrality_analysis"]["pagerank"]) > 0
376 | 
377 |     def test_rustworkx_serialization_integration(self, analysis_engine):
378 |         """Test that rustworkx graph serialization works with MCP."""
379 |         # Get the underlying rustworkx graph
380 |         rustworkx_graph = analysis_engine.graph
381 | 
382 |         # Test JSON serialization
383 |         json_output = rustworkx_graph.to_json()
384 |         assert isinstance(json_output, str)
385 |         assert len(json_output) > 100  # Should have substantial content
386 | 
387 |         # Verify it's valid JSON
388 |         json_data = json.loads(json_output)
389 |         assert isinstance(json_data, dict)
390 | 
391 |         # Test DOT serialization
392 |         dot_output = rustworkx_graph.to_dot()
393 |         assert isinstance(dot_output, str)
394 |         assert "digraph" in dot_output.lower()
395 | 
396 |         # Test statistics
397 |         graph_stats = rustworkx_graph.get_statistics()
398 |         assert "total_nodes" in graph_stats
399 |         assert "total_relationships" in graph_stats
400 |         assert graph_stats["total_nodes"] > 0
401 | 
402 |     def test_error_handling_integration(self, analysis_engine):
403 |         """Test error handling in MCP integration with rustworkx."""
404 |         # Test non-existent symbol
405 |         no_defs = analysis_engine.find_symbol_definition("nonexistent_symbol")
406 |         assert len(no_defs) == 0
407 | 
408 |         no_callers = analysis_engine.find_function_callers("nonexistent_function")
409 |         assert len(no_callers) == 0
410 | 
411 |         no_callees = analysis_engine.find_function_callees("nonexistent_function")
412 |         assert len(no_callees) == 0
413 | 
414 |         # Operations should not crash and return empty results gracefully
415 |         assert isinstance(no_defs, list)
416 |         assert isinstance(no_callers, list)
417 |         assert isinstance(no_callees, list)
418 | 
419 |     def test_large_graph_integration(self):
420 |         """Test integration with a larger graph to verify scalability."""
421 |         # Create a larger synthetic graph
422 |         graph = RustworkxCodeGraph()
423 | 
424 |         # Add 100 nodes
425 |         for i in range(100):
426 |             node = UniversalNode(
427 |                 id=f"node_{i}",
428 |                 name=f"Function_{i}",
429 |                 node_type=NodeType.FUNCTION,
430 |                 location=UniversalLocation(
431 |                     file_path=f"/test/file_{i//10}.py",
432 |                     start_line=10 + i,
433 |                     end_line=20 + i,
434 |                     language="Python"
435 |                 ),
436 |                 language="Python",
437 |                 complexity=(i % 10) + 1
438 |             )
439 |             graph.add_node(node)
440 | 
441 |         # Add relationships
442 |         for i in range(50):
443 |             rel = UniversalRelationship(
444 |                 id=f"calls_{i}_{i+1}",
445 |                 source_id=f"node_{i}",
446 |                 target_id=f"node_{i+1}",
447 |                 relationship_type=RelationshipType.CALLS
448 |             )
449 |             graph.add_relationship(rel)
450 | 
451 |         # Test that rustworkx operations scale well
452 |         start_time = time.time()
453 | 
454 |         centrality = graph.calculate_centrality()
455 |         pagerank = graph.calculate_pagerank()
456 |         stats = graph.get_statistics()
457 | 
458 |         total_time = time.time() - start_time
459 | 
460 |         # Should complete quickly even with 100 nodes
461 |         assert total_time < 10.0
462 |         assert len(centrality) == 100
463 |         assert len(pagerank) == 100
464 |         assert stats["total_nodes"] == 100
465 | 
466 |     @pytest.mark.asyncio
467 |     async def test_mcp_tool_handlers_with_rustworkx(self, analysis_engine):
468 |         """Test that MCP tool handlers work correctly with rustworkx backend."""
469 |         from code_graph_mcp.server import (
470 |             handle_analyze_codebase,
471 |             handle_find_definition,
472 |             handle_find_callers,
473 |             handle_complexity_analysis,
474 |             handle_project_statistics
475 |         )
476 | 
477 |         # Test analyze_codebase handler
478 |         result = await handle_analyze_codebase(analysis_engine, {})
479 |         assert len(result) == 1
480 |         assert result[0].type == "text"
481 |         assert "Analysis Complete" in result[0].text
482 | 
483 |         # Test find_definition handler
484 |         result = await handle_find_definition(analysis_engine, {"symbol": "main"})
485 |         assert len(result) == 1
486 |         assert "Definition Analysis" in result[0].text
487 | 
488 |         # Test find_callers handler
489 |         result = await handle_find_callers(analysis_engine, {"function": "helper_function"})
490 |         assert len(result) == 1
491 |         assert "Caller Analysis" in result[0].text
492 | 
493 |         # Test complexity_analysis handler
494 |         result = await handle_complexity_analysis(analysis_engine, {"threshold": 1})
495 |         assert len(result) == 1
496 |         assert "Complexity Analysis" in result[0].text
497 | 
498 |         # Test project_statistics handler with rustworkx enhancements
499 |         result = await handle_project_statistics(analysis_engine, {})
500 |         assert len(result) == 1
501 |         text_content = result[0].text
502 |         assert "Advanced Project Statistics" in text_content
503 |         assert "Powered by rustworkx" in text_content
504 |         assert "Graph Analytics" in text_content
505 |         assert "Most Central Code Elements" in text_content
506 | 
507 | 
508 | if __name__ == "__main__":
509 |     pytest.main([__file__, "-v"])
510 | 


--------------------------------------------------------------------------------
/tests/test_rustworkx_graph.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Comprehensive test suite for RustworkxCodeGraph functionality.
  4 | 
  5 | Tests all major features including:
  6 | - Graph construction and manipulation
  7 | - Advanced analytics (centrality, PageRank, etc.)
  8 | - Serialization (JSON, DOT, GraphML)
  9 | - Traversal algorithms (DFS, BFS)
 10 | - Connectivity analysis
 11 | - Error handling and edge cases
 12 | """
 13 | 
 14 | import json
 15 | import pytest
 16 | import tempfile
 17 | import os
 18 | from unittest.mock import patch
 19 | 
 20 | # Import our code
 21 | from code_graph_mcp.rustworkx_graph import RustworkxCodeGraph
 22 | from code_graph_mcp.universal_graph import (
 23 |     UniversalNode, UniversalRelationship, UniversalLocation,
 24 |     NodeType, RelationshipType
 25 | )
 26 | 
 27 | 
 28 | class TestRustworkxCodeGraph:
 29 |     """Test suite for RustworkxCodeGraph functionality."""
 30 | 
 31 |     @pytest.fixture
 32 |     def sample_graph(self):
 33 |         """Create a sample graph with nodes and relationships for testing."""
 34 |         graph = RustworkxCodeGraph()
 35 | 
 36 |         # Create sample nodes
 37 |         nodes = [
 38 |             UniversalNode(
 39 |                 id="file:main.py",
 40 |                 name="main.py",
 41 |                 node_type=NodeType.MODULE,
 42 |                 location=UniversalLocation(
 43 |                     file_path="/test/main.py",
 44 |                     start_line=1,
 45 |                     end_line=50,
 46 |                     language="Python"
 47 |                 ),
 48 |                 language="Python",
 49 |                 content="# Main module",
 50 |                 line_count=50
 51 |             ),
 52 |             UniversalNode(
 53 |                 id="function:main.py:main:10",
 54 |                 name="main",
 55 |                 node_type=NodeType.FUNCTION,
 56 |                 location=UniversalLocation(
 57 |                     file_path="/test/main.py",
 58 |                     start_line=10,
 59 |                     end_line=20,
 60 |                     language="Python"
 61 |                 ),
 62 |                 language="Python",
 63 |                 complexity=5,
 64 |                 metadata={"docstring": "Main function"}
 65 |             ),
 66 |             UniversalNode(
 67 |                 id="function:main.py:helper:25",
 68 |                 name="helper",
 69 |                 node_type=NodeType.FUNCTION,
 70 |                 location=UniversalLocation(
 71 |                     file_path="/test/main.py",
 72 |                     start_line=25,
 73 |                     end_line=35,
 74 |                     language="Python"
 75 |                 ),
 76 |                 language="Python",
 77 |                 complexity=3
 78 |             ),
 79 |             UniversalNode(
 80 |                 id="class:main.py:TestClass:40",
 81 |                 name="TestClass",
 82 |                 node_type=NodeType.CLASS,
 83 |                 location=UniversalLocation(
 84 |                     file_path="/test/main.py",
 85 |                     start_line=40,
 86 |                     end_line=50,
 87 |                     language="Python"
 88 |                 ),
 89 |                 language="Python"
 90 |             )
 91 |         ]
 92 | 
 93 |         # Add nodes to graph
 94 |         for node in nodes:
 95 |             graph.add_node(node)
 96 | 
 97 |         # Create sample relationships
 98 |         relationships = [
 99 |             UniversalRelationship(
100 |                 id="contains:file:main:function:main",
101 |                 source_id="file:main.py",
102 |                 target_id="function:main.py:main:10",
103 |                 relationship_type=RelationshipType.CONTAINS
104 |             ),
105 |             UniversalRelationship(
106 |                 id="contains:file:main:function:helper",
107 |                 source_id="file:main.py",
108 |                 target_id="function:main.py:helper:25",
109 |                 relationship_type=RelationshipType.CONTAINS
110 |             ),
111 |             UniversalRelationship(
112 |                 id="contains:file:main:class:TestClass",
113 |                 source_id="file:main.py",
114 |                 target_id="class:main.py:TestClass:40",
115 |                 relationship_type=RelationshipType.CONTAINS
116 |             ),
117 |             UniversalRelationship(
118 |                 id="calls:main:helper",
119 |                 source_id="function:main.py:main:10",
120 |                 target_id="function:main.py:helper:25",
121 |                 relationship_type=RelationshipType.CALLS,
122 |                 metadata={"call_line": 15}
123 |             )
124 |         ]
125 | 
126 |         # Add relationships to graph
127 |         for rel in relationships:
128 |             graph.add_relationship(rel)
129 | 
130 |         return graph
131 | 
132 |     def test_graph_initialization(self):
133 |         """Test basic graph initialization."""
134 |         graph = RustworkxCodeGraph()
135 | 
136 |         assert len(graph.nodes) == 0
137 |         assert len(graph.relationships) == 0
138 |         assert len(graph.graph) == 0  # rustworkx graph should be empty
139 |         assert len(graph._processed_files) == 0
140 | 
141 |     def test_add_node(self, sample_graph):
142 |         """Test adding nodes to the graph."""
143 |         assert len(sample_graph.nodes) == 4
144 |         assert len(sample_graph.graph) == 4  # rustworkx graph should have 4 nodes
145 | 
146 |         # Verify nodes have rustworkx indices
147 |         for node in sample_graph.nodes.values():
148 |             assert hasattr(node, '_rustworkx_index')
149 | 
150 |         # Test node retrieval
151 |         main_node = sample_graph.get_node("function:main.py:main:10")
152 |         assert main_node is not None
153 |         assert main_node.name == "main"
154 |         assert main_node.complexity == 5
155 | 
156 |     def test_add_relationship(self, sample_graph):
157 |         """Test adding relationships to the graph."""
158 |         assert len(sample_graph.relationships) == 4
159 | 
160 |         # Verify relationships have rustworkx edge indices
161 |         for rel in sample_graph.relationships.values():
162 |             assert hasattr(rel, '_rustworkx_edge_index')
163 | 
164 |         # Test relationship retrieval
165 |         calls_rel = sample_graph.relationships["calls:main:helper"]
166 |         assert calls_rel.relationship_type == RelationshipType.CALLS
167 |         assert calls_rel.metadata["call_line"] == 15
168 | 
169 |     def test_find_nodes_by_name(self, sample_graph):
170 |         """Test finding nodes by name."""
171 |         # Exact match
172 |         main_nodes = sample_graph.find_nodes_by_name("main", exact_match=True)
173 |         assert len(main_nodes) == 1
174 |         assert main_nodes[0].name == "main"
175 | 
176 |         # Fuzzy match
177 |         main_fuzzy = sample_graph.find_nodes_by_name("mai", exact_match=False)
178 |         assert len(main_fuzzy) >= 1
179 | 
180 |         # Non-existent node
181 |         nonexistent = sample_graph.find_nodes_by_name("nonexistent", exact_match=True)
182 |         assert len(nonexistent) == 0
183 | 
184 |     def test_get_nodes_by_type(self, sample_graph):
185 |         """Test filtering nodes by type."""
186 |         functions = sample_graph.get_nodes_by_type(NodeType.FUNCTION)
187 |         assert len(functions) == 2
188 | 
189 |         classes = sample_graph.get_nodes_by_type(NodeType.CLASS)
190 |         assert len(classes) == 1
191 |         assert classes[0].name == "TestClass"
192 | 
193 |         modules = sample_graph.get_nodes_by_type(NodeType.MODULE)
194 |         assert len(modules) == 1
195 | 
196 |     def test_get_relationships_from_to(self, sample_graph):
197 |         """Test getting relationships from/to nodes."""
198 |         # Test relationships from file node
199 |         file_rels = sample_graph.get_relationships_from("file:main.py")
200 |         assert len(file_rels) == 3  # Contains 3 elements
201 | 
202 |         # Test relationships to helper function
203 |         helper_rels = sample_graph.get_relationships_to("function:main.py:helper:25")
204 |         assert len(helper_rels) == 2  # Contained by file, called by main
205 | 
206 |     def test_centrality_calculations(self, sample_graph):
207 |         """Test centrality calculation methods."""
208 |         # Test betweenness centrality
209 |         betweenness = sample_graph.calculate_centrality()
210 |         assert isinstance(betweenness, dict)
211 |         assert len(betweenness) > 0
212 | 
213 |         # Test PageRank
214 |         pagerank = sample_graph.calculate_pagerank()
215 |         assert isinstance(pagerank, dict)
216 |         assert len(pagerank) > 0
217 | 
218 |         # Test with custom parameters
219 |         pagerank_custom = sample_graph.calculate_pagerank(alpha=0.9, max_iter=50, tol=1e-4)
220 |         assert isinstance(pagerank_custom, dict)
221 | 
222 |         # Test closeness centrality
223 |         closeness = sample_graph.calculate_closeness_centrality()
224 |         assert isinstance(closeness, dict)
225 | 
226 |         # Test eigenvector centrality
227 |         eigenvector = sample_graph.calculate_eigenvector_centrality()
228 |         assert isinstance(eigenvector, dict)
229 | 
230 |     def test_structural_analysis(self, sample_graph):
231 |         """Test structural analysis methods."""
232 |         # Test articulation points
233 |         articulation_points = sample_graph.find_articulation_points()
234 |         assert isinstance(articulation_points, list)
235 | 
236 |         # Test bridges
237 |         bridges = sample_graph.find_bridges()
238 |         assert isinstance(bridges, list)
239 | 
240 |         # Test strongly connected components
241 |         components = sample_graph.get_strongly_connected_components()
242 |         assert isinstance(components, list)
243 | 
244 |         # Test cycle detection
245 |         cycles = sample_graph.detect_cycles()
246 |         assert isinstance(cycles, list)
247 | 
248 |         # Test DAG check
249 |         is_dag = sample_graph.is_directed_acyclic()
250 |         assert isinstance(is_dag, bool)
251 | 
252 |     def test_path_analysis(self, sample_graph):
253 |         """Test path finding and analysis methods."""
254 |         # Test shortest path
255 |         path = sample_graph.find_shortest_path(
256 |             "file:main.py",
257 |             "function:main.py:helper:25"
258 |         )
259 |         assert isinstance(path, list)
260 | 
261 |         # Test all paths
262 |         all_paths = sample_graph.find_all_paths(
263 |             "file:main.py",
264 |             "function:main.py:helper:25",
265 |             max_length=5
266 |         )
267 |         assert isinstance(all_paths, list)
268 | 
269 |         # Test ancestors and descendants
270 |         ancestors = sample_graph.find_ancestors("function:main.py:helper:25")
271 |         assert isinstance(ancestors, set)
272 | 
273 |         descendants = sample_graph.find_descendants("file:main.py")
274 |         assert isinstance(descendants, set)
275 | 
276 |     def test_traversal_algorithms(self, sample_graph):
277 |         """Test DFS and BFS traversal algorithms."""
278 |         # Test DFS
279 |         dfs_nodes = sample_graph.depth_first_search("file:main.py")
280 |         assert isinstance(dfs_nodes, list)
281 |         assert len(dfs_nodes) > 0
282 |         assert "file:main.py" in dfs_nodes
283 | 
284 |         # Test BFS (may fail with some rustworkx configurations, handle gracefully)
285 |         bfs_nodes = sample_graph.breadth_first_search("file:main.py")
286 |         assert isinstance(bfs_nodes, list)
287 |         # BFS should at least include the start node
288 |         if len(bfs_nodes) == 0:
289 |             # If BFS fails, test that it returns empty list gracefully
290 |             assert bfs_nodes == []
291 |         else:
292 |             assert "file:main.py" in bfs_nodes
293 | 
294 |         # Test with visitor function
295 |         visited_nodes = []
296 |         def visitor(node_id):
297 |             visited_nodes.append(node_id)
298 | 
299 |         sample_graph.depth_first_search("file:main.py", visitor_fn=visitor)
300 |         # Visitor may not be called if traversal fails, but shouldn't crash
301 | 
302 |     def test_node_layers(self, sample_graph):
303 |         """Test finding node layers from a source."""
304 |         layers = sample_graph.find_node_layers("file:main.py")
305 |         assert isinstance(layers, dict)
306 |         # Source node may or may not be included depending on rustworkx implementation
307 |         # Just verify we get valid layer structure
308 |         if layers:
309 |             # Should have at least one layer
310 |             assert len(layers) > 0
311 |             # Layer numbers should be non-negative integers
312 |             for layer_num in layers.keys():
313 |                 assert isinstance(layer_num, int)
314 |                 assert layer_num >= 0
315 | 
316 |     def test_dominating_set(self, sample_graph):
317 |         """Test dominating set calculation (degree-based approximation)."""
318 |         dominating_set = sample_graph.find_dominating_set()
319 |         assert isinstance(dominating_set, list)
320 |         assert len(dominating_set) > 0
321 | 
322 |     def test_node_degree(self, sample_graph):
323 |         """Test node degree calculations."""
324 |         file_degree = sample_graph.get_node_degree("file:main.py")
325 |         assert isinstance(file_degree, tuple)
326 |         assert len(file_degree) == 3  # (in_degree, out_degree, total_degree)
327 | 
328 |         # File node should have outgoing edges (contains relationships)
329 |         in_deg, out_deg, total_deg = file_degree
330 |         assert out_deg > 0
331 |         assert total_deg == in_deg + out_deg
332 | 
333 |     def test_connectivity_analysis(self, sample_graph):
334 |         """Test comprehensive connectivity analysis."""
335 |         connectivity = sample_graph.analyze_graph_connectivity()
336 |         assert isinstance(connectivity, dict)
337 | 
338 |         # Check expected structure
339 |         assert "basic_metrics" in connectivity
340 |         assert "connectivity_metrics" in connectivity
341 |         assert "distance_metrics" in connectivity
342 | 
343 |         basic_metrics = connectivity["basic_metrics"]
344 |         assert "num_nodes" in basic_metrics
345 |         assert "num_edges" in basic_metrics
346 |         assert basic_metrics["num_nodes"] > 0
347 | 
348 |     def test_node_connectivity_analysis(self, sample_graph):
349 |         """Test individual node connectivity analysis."""
350 |         node_analysis = sample_graph.analyze_node_connectivity("file:main.py")
351 |         assert isinstance(node_analysis, dict)
352 | 
353 |         # Check expected structure
354 |         assert "degree_analysis" in node_analysis
355 |         assert "reachability" in node_analysis
356 |         assert "distance_analysis" in node_analysis
357 |         assert "structural_importance" in node_analysis
358 | 
359 |     def test_statistics(self, sample_graph):
360 |         """Test graph statistics generation."""
361 |         stats = sample_graph.get_statistics()
362 |         assert isinstance(stats, dict)
363 | 
364 |         # Check expected fields
365 |         assert "total_nodes" in stats
366 |         assert "total_relationships" in stats
367 |         assert "node_types" in stats
368 |         assert "languages" in stats
369 |         assert "relationship_types" in stats
370 | 
371 |         assert stats["total_nodes"] == 4
372 |         assert stats["total_relationships"] == 4
373 | 
374 |     def test_json_serialization(self, sample_graph):
375 |         """Test JSON serialization functionality."""
376 |         # Test basic JSON serialization
377 |         json_str = sample_graph.to_json()
378 |         assert isinstance(json_str, str)
379 |         assert len(json_str) > 0
380 | 
381 |         # Test that it's valid JSON
382 |         json_data = json.loads(json_str)
383 |         assert isinstance(json_data, dict)
384 | 
385 |         # Test with indentation
386 |         json_pretty = sample_graph.to_json(indent=2)
387 |         assert isinstance(json_pretty, str)
388 |         assert len(json_pretty) > len(json_str)  # Should be longer with formatting
389 | 
390 |     def test_dot_serialization(self, sample_graph):
391 |         """Test DOT format serialization."""
392 |         dot_str = sample_graph.to_dot()
393 |         assert isinstance(dot_str, str)
394 |         assert "digraph" in dot_str.lower()
395 |         assert len(dot_str) > 0
396 | 
397 |         # Test with custom attributes
398 |         def custom_node_attr(node):
399 |             return {"label": f"Custom_{node.name}", "color": "red"}
400 | 
401 |         def custom_edge_attr(edge):
402 |             return {"label": edge.relationship_type.value, "style": "dashed"}
403 | 
404 |         custom_dot = sample_graph.to_dot(
405 |             node_attr_fn=custom_node_attr,
406 |             edge_attr_fn=custom_edge_attr
407 |         )
408 |         assert "Custom_" in custom_dot
409 |         assert "dashed" in custom_dot
410 | 
411 |     def test_graphml_serialization(self, sample_graph):
412 |         """Test GraphML serialization."""
413 |         with tempfile.NamedTemporaryFile(mode='w', suffix='.graphml', delete=False) as f:
414 |             temp_filename = f.name
415 | 
416 |         try:
417 |             success = sample_graph.to_graphml(temp_filename)
418 |             assert success is True
419 | 
420 |             # Check that file was created and has content
421 |             assert os.path.exists(temp_filename)
422 |             with open(temp_filename, 'r') as f:
423 |                 content = f.read()
424 |                 assert "graphml" in content.lower()
425 |                 assert len(content) > 0
426 |         finally:
427 |             # Cleanup
428 |             if os.path.exists(temp_filename):
429 |                 os.unlink(temp_filename)
430 | 
431 |     def test_json_deserialization(self, sample_graph):
432 |         """Test JSON deserialization (loading from JSON)."""
433 |         # Serialize to JSON
434 |         json_str = sample_graph.to_json()
435 | 
436 |         # Create new graph and load from JSON
437 |         new_graph = RustworkxCodeGraph()
438 |         success = new_graph.from_json(json_str)
439 | 
440 |         # Note: from_json is a simplified implementation
441 |         # We mainly test that it doesn't crash and follows expected behavior
442 |         assert isinstance(success, bool)
443 | 
444 |     def test_analysis_report_export(self, sample_graph):
445 |         """Test comprehensive analysis report export."""
446 |         with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
447 |             temp_filename = f.name
448 | 
449 |         try:
450 |             success = sample_graph.export_analysis_report(temp_filename, format="json")
451 |             assert success is True
452 | 
453 |             # Check that file was created and has valid JSON
454 |             assert os.path.exists(temp_filename)
455 |             with open(temp_filename, 'r') as f:
456 |                 report = json.load(f)
457 |                 assert "metadata" in report
458 |                 assert "statistics" in report
459 |                 assert "centrality_analysis" in report
460 |                 assert "structural_analysis" in report
461 |         finally:
462 |             if os.path.exists(temp_filename):
463 |                 os.unlink(temp_filename)
464 | 
465 |     def test_error_handling(self, sample_graph):
466 |         """Test error handling for various edge cases."""
467 |         graph = RustworkxCodeGraph()
468 | 
469 |         # Test with empty graph
470 |         assert graph.calculate_centrality() == {}
471 |         assert graph.find_shortest_path("nonexistent1", "nonexistent2") == []
472 |         assert graph.find_ancestors("nonexistent") == set()
473 |         assert graph.get_node_degree("nonexistent") == (0, 0, 0)
474 | 
475 |         # Test malformed operations
476 |         empty_json = graph.to_json()
477 |         assert isinstance(empty_json, str)
478 | 
479 |         # Test clear functionality
480 |         sample_graph.clear()
481 |         assert len(sample_graph.nodes) == 0
482 |         assert len(sample_graph.relationships) == 0
483 | 
484 |     def test_large_graph_performance(self):
485 |         """Test performance with a larger graph."""
486 |         graph = RustworkxCodeGraph()
487 | 
488 |         # Create a moderately sized graph (100 nodes, ~200 relationships)
489 |         nodes = []
490 |         for i in range(100):
491 |             node = UniversalNode(
492 |                 id=f"node_{i}",
493 |                 name=f"Node_{i}",
494 |                 node_type=NodeType.FUNCTION,
495 |                 location=UniversalLocation(
496 |                     file_path=f"/test/file_{i//10}.py",
497 |                     start_line=i+1,  # Line numbers start at 1
498 |                     end_line=i+6,
499 |                     language="Python"
500 |                 ),
501 |                 language="Python",
502 |                 complexity=i % 10
503 |             )
504 |             nodes.append(node)
505 |             graph.add_node(node)
506 | 
507 |         # Add relationships (each node calls next 2 nodes)
508 |         for i in range(98):
509 |             for j in range(1, 3):
510 |                 if i + j < 100:
511 |                     rel = UniversalRelationship(
512 |                         id=f"calls_{i}_{i+j}",
513 |                         source_id=f"node_{i}",
514 |                         target_id=f"node_{i+j}",
515 |                         relationship_type=RelationshipType.CALLS
516 |                     )
517 |                     graph.add_relationship(rel)
518 | 
519 |         # Test that operations complete without errors
520 |         stats = graph.get_statistics()
521 |         assert stats["total_nodes"] == 100
522 | 
523 |         centrality = graph.calculate_centrality()
524 |         assert len(centrality) > 0
525 | 
526 |         pagerank = graph.calculate_pagerank()
527 |         assert len(pagerank) > 0
528 | 
529 |     @patch('rustworkx.node_link_json')
530 |     def test_fallback_mechanisms(self, mock_node_link_json, sample_graph):
531 |         """Test fallback mechanisms when rustworkx functions are unavailable."""
532 |         # Mock rustworkx function to raise AttributeError
533 |         mock_node_link_json.side_effect = AttributeError("Function not available")
534 | 
535 |         # Test JSON serialization fallback
536 |         json_str = sample_graph.to_json()
537 |         assert isinstance(json_str, str)
538 |         assert len(json_str) > 0
539 | 
540 |         # Should use fallback implementation
541 |         json_data = json.loads(json_str)
542 |         assert "nodes" in json_data
543 |         assert "edges" in json_data
544 | 
545 |     def test_weight_functions(self, sample_graph):
546 |         """Test weighted graph operations."""
547 |         # Test with custom weight function
548 |         def weight_fn(edge_data):
549 |             if hasattr(edge_data, 'strength'):
550 |                 return edge_data.strength
551 |             return 1.0
552 | 
553 |         # Test Bellman-Ford path lengths
554 |         paths = sample_graph.calculate_bellman_ford_path_lengths(weight_fn)
555 |         assert isinstance(paths, dict)
556 | 
557 |         # Test weighted shortest paths
558 |         weighted_paths = sample_graph.calculate_weighted_shortest_paths(
559 |             "file:main.py",
560 |             weight_fn
561 |         )
562 |         assert isinstance(weighted_paths, dict)
563 | 
564 |         # Test negative cycle detection
565 |         has_negative_cycles = sample_graph.detect_negative_cycles(weight_fn)
566 |         assert isinstance(has_negative_cycles, bool)
567 | 
568 |     def test_topological_operations(self, sample_graph):
569 |         """Test topological operations."""
570 |         # Test topological sort
571 |         topo_order = sample_graph.topological_sort()
572 |         assert isinstance(topo_order, list)
573 | 
574 |     def test_distance_matrix(self, sample_graph):
575 |         """Test distance matrix calculations."""
576 |         # Test Floyd-Warshall distance matrix
577 |         distance_matrix = sample_graph.calculate_graph_distance_matrix()
578 |         assert isinstance(distance_matrix, dict)
579 | 
580 |         # Should have entries for reachable node pairs
581 |         if distance_matrix:
582 |             # Pick first entry to validate structure
583 |             first_source = next(iter(distance_matrix.keys()))
584 |             first_targets = distance_matrix[first_source]
585 |             assert isinstance(first_targets, dict)
586 | 
587 | 
588 | # Integration tests that require actual rustworkx
589 | class TestRustworkxIntegration:
590 |     """Integration tests that test actual rustworkx functionality."""
591 | 
592 |     def test_rustworkx_available(self):
593 |         """Test that rustworkx is available and working."""
594 |         try:
595 |             import rustworkx as rx
596 |             graph = rx.PyDiGraph()
597 |             node_idx = graph.add_node("test")
598 |             assert node_idx == 0
599 |         except ImportError:
600 |             pytest.skip("rustworkx not available")
601 | 
602 |     def test_real_rustworkx_operations(self):
603 |         """Test operations with real rustworkx backend."""
604 |         try:
605 |             graph = RustworkxCodeGraph()
606 | 
607 |             # Add a simple node
608 |             node = UniversalNode(
609 |                 id="test_node",
610 |                 name="Test",
611 |                 node_type=NodeType.FUNCTION,
612 |                 location=UniversalLocation(
613 |                     file_path="/test.py",
614 |                     start_line=1,
615 |                     end_line=5,
616 |                     language="Python"
617 |                 ),
618 |                 language="Python"
619 |             )
620 |             graph.add_node(node)
621 | 
622 |             # Test that rustworkx graph operations work
623 |             assert len(graph.nodes) == 1
624 |             assert len(graph.graph) == 1  # rustworkx graph should have 1 node
625 | 
626 |             # Verify node has rustworkx index
627 |             assert hasattr(node, '_rustworkx_index')
628 | 
629 |             # Test rustworkx-specific functionality
630 |             stats = graph.get_statistics()
631 |             assert stats["total_nodes"] == 1
632 | 
633 |         except ImportError:
634 |             pytest.skip("rustworkx not available")
635 | 
636 | 
637 | if __name__ == "__main__":
638 |     pytest.main([__file__, "-v"])
639 | 


--------------------------------------------------------------------------------
/src/code_graph_mcp/universal_ast.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Universal AST Analyzer
  3 | 
  4 | High-level analyzer that provides cross-language analysis capabilities.
  5 | Builds on the universal graph to provide code intelligence features.
  6 | """
  7 | 
  8 | import logging
  9 | from collections import defaultdict
 10 | from functools import lru_cache
 11 | from pathlib import Path
 12 | from typing import Any, Dict, List, Set, Union
 13 | 
 14 | from .universal_graph import (
 15 |     NodeType,
 16 |     RelationshipType,
 17 |     UniversalNode,
 18 | )
 19 | from .universal_parser import UniversalParser
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | class UniversalASTAnalyzer:
 25 |     """High-level analyzer providing cross-language analysis capabilities."""
 26 | 
 27 |     def __init__(self, project_root: Path):
 28 |         self.project_root = project_root
 29 |         self.parser = UniversalParser()
 30 |         self.graph = self.parser.graph
 31 |         self._analysis_cache: Dict[str, Any] = {}
 32 | 
 33 |     def analyze_project(self, recursive: bool = True) -> Dict[str, Any]:
 34 |         """Analyze entire project and return comprehensive statistics."""
 35 |         logger.info("Analyzing project: %s", self.project_root)
 36 | 
 37 |         # Parse all files
 38 |         parsed_files = self.parser.parse_directory(self.project_root, recursive)
 39 | 
 40 |         # Get basic statistics
 41 |         stats = self.graph.get_statistics()
 42 | 
 43 |         # Add additional analysis
 44 |         stats.update({
 45 |             "parsed_files": parsed_files,
 46 |             "code_smells": self.detect_code_smells(),
 47 |             "complexity_analysis": self.analyze_complexity(),
 48 |             "dependency_analysis": self.analyze_dependencies(),
 49 |             "quality_metrics": self.calculate_quality_metrics(),
 50 |             "language_distribution": self.get_language_distribution(),
 51 |         })
 52 | 
 53 |         logger.info("Analysis complete: %d nodes, %d relationships",
 54 |                    stats["total_nodes"], stats["total_relationships"])
 55 | 
 56 |         return stats
 57 | 
 58 |     def detect_code_smells(self) -> Dict[str, List[Dict[str, Any]]]:
 59 |         """Detect various code smells across all languages."""
 60 |         smells = {
 61 |             "long_functions": [],
 62 |             "complex_functions": [],
 63 |             "duplicate_logic": [],
 64 |             "large_classes": [],
 65 |             "god_classes": [],
 66 |             "dead_code": [],
 67 |             "naming_issues": [],
 68 |         }
 69 | 
 70 |         # Analyze functions
 71 |         functions = self.graph.get_nodes_by_type(NodeType.FUNCTION)
 72 |         for func in functions:
 73 |             # Long functions (>50 lines)
 74 |             if func.line_count > 50:
 75 |                 smells["long_functions"].append({
 76 |                     "name": func.name,
 77 |                     "location": f"{func.location.file_path}:{func.location.start_line}",
 78 |                     "line_count": func.line_count,
 79 |                     "language": func.language,
 80 |                     "severity": "high" if func.line_count > 100 else "medium"
 81 |                 })
 82 | 
 83 |             # Complex functions (high cyclomatic complexity)
 84 |             if func.complexity > 15:
 85 |                 smells["complex_functions"].append({
 86 |                     "name": func.name,
 87 |                     "location": f"{func.location.file_path}:{func.location.start_line}",
 88 |                     "complexity": func.complexity,
 89 |                     "language": func.language,
 90 |                     "severity": "high" if func.complexity > 20 else "medium"
 91 |                 })
 92 | 
 93 |             # Naming issues (single letter names, etc.)
 94 |             if len(func.name) <= 2 and func.name not in ["id", "x", "y", "i", "j", "k"]:
 95 |                 smells["naming_issues"].append({
 96 |                     "name": func.name,
 97 |                     "location": f"{func.location.file_path}:{func.location.start_line}",
 98 |                     "issue": "Very short function name",
 99 |                     "language": func.language,
100 |                     "severity": "low"
101 |                 })
102 | 
103 |         # Analyze classes
104 |         classes = self.graph.get_nodes_by_type(NodeType.CLASS)
105 |         for cls in classes:
106 |             # Get methods in this class
107 |             class_methods = [
108 |                 rel.target_id for rel in self.graph.get_relationships_from(cls.id)
109 |                 if rel.relationship_type == RelationshipType.CONTAINS
110 |             ]
111 |             method_count = len(class_methods)
112 | 
113 |             # Large classes (many methods)
114 |             if method_count > 20:
115 |                 smells["large_classes"].append({
116 |                     "name": cls.name,
117 |                     "location": f"{cls.location.file_path}:{cls.location.start_line}",
118 |                     "method_count": method_count,
119 |                     "language": cls.language,
120 |                     "severity": "high" if method_count > 30 else "medium"
121 |                 })
122 | 
123 |             # God classes (too many responsibilities)
124 |             if method_count > 30 and cls.line_count > 500:
125 |                 smells["god_classes"].append({
126 |                     "name": cls.name,
127 |                     "location": f"{cls.location.file_path}:{cls.location.start_line}",
128 |                     "method_count": method_count,
129 |                     "line_count": cls.line_count,
130 |                     "language": cls.language,
131 |                     "severity": "critical"
132 |                 })
133 | 
134 |         # Find duplicate logic patterns
135 |         smells["duplicate_logic"] = self._find_duplicate_patterns(functions)
136 | 
137 |         # Find potentially dead code
138 |         smells["dead_code"] = self._find_dead_code()
139 | 
140 |         return smells
141 | 
142 |     @lru_cache(maxsize=10000)
143 |     def analyze_complexity(self, threshold: int = 10) -> Dict[str, Any]:
144 |         """Analyze code complexity across the project with LRU caching."""
145 |         functions = self.graph.get_nodes_by_type(NodeType.FUNCTION)
146 | 
147 |         if not functions:
148 |             return {
149 |                 "total_functions": 0,
150 |                 "average_complexity": 0.0,
151 |                 "max_complexity": 0,
152 |                 "high_complexity_functions": [],
153 |                 "complexity_distribution": {},
154 |             }
155 | 
156 |         complexities = [func.complexity for func in functions if func.complexity > 0]
157 | 
158 |         if not complexities:
159 |             return {
160 |                 "total_functions": len(functions),
161 |                 "average_complexity": 0.0,
162 |                 "max_complexity": 0,
163 |                 "high_complexity_functions": [],
164 |                 "complexity_distribution": {},
165 |             }
166 | 
167 |         # Calculate distribution
168 |         distribution = defaultdict(int)
169 |         for complexity in complexities:
170 |             if complexity <= 5:
171 |                 distribution["simple"] += 1
172 |             elif complexity <= 10:
173 |                 distribution["moderate"] += 1
174 |             elif complexity <= 20:
175 |                 distribution["complex"] += 1
176 |             else:
177 |                 distribution["very_complex"] += 1
178 | 
179 |         # Find high complexity functions
180 |         high_complexity = [
181 |             {
182 |                 "name": func.name,
183 |                 "complexity": func.complexity,
184 |                 "location": f"{func.location.file_path}:{func.location.start_line}",
185 |                 "language": func.language,
186 |                 "risk_level": "critical" if func.complexity > 25 else "high"
187 |             }
188 |             for func in functions
189 |             if func.complexity >= threshold
190 |         ]
191 | 
192 |         high_complexity.sort(key=lambda x: x["complexity"], reverse=True)
193 | 
194 |         return {
195 |             "total_functions": len(functions),
196 |             "average_complexity": sum(complexities) / len(complexities),
197 |             "max_complexity": max(complexities),
198 |             "high_complexity_functions": high_complexity,
199 |             "complexity_distribution": dict(distribution),
200 |             "functions_above_threshold": len(high_complexity)
201 |         }
202 | 
203 |     def analyze_dependencies(self) -> Dict[str, Any]:
204 |         """Analyze dependencies and coupling between modules."""
205 |         import_relationships = self.graph.get_relationships_by_type(RelationshipType.IMPORTS)
206 | 
207 |         # Build dependency graph
208 |         dependencies = defaultdict(set)
209 |         reverse_dependencies = defaultdict(set)
210 | 
211 |         for rel in import_relationships:
212 |             source_node = self.graph.get_node(rel.source_id)
213 |             if source_node and source_node.node_type == NodeType.MODULE:
214 |                 target = rel.target_id.replace("module:", "")
215 |                 dependencies[source_node.name].add(target)
216 |                 reverse_dependencies[target].add(source_node.name)
217 | 
218 |         # Calculate metrics
219 |         total_dependencies = sum(len(deps) for deps in dependencies.values())
220 | 
221 |         # Find highly coupled modules
222 |         highly_coupled = [
223 |             {
224 |                 "module": module,
225 |                 "dependency_count": len(deps),
226 |                 "dependencies": list(deps),
227 |                 "severity": "high" if len(deps) > 10 else "medium"
228 |             }
229 |             for module, deps in dependencies.items()
230 |             if len(deps) > 5
231 |         ]
232 | 
233 |         # Find modules with many dependents
234 |         popular_modules = [
235 |             {
236 |                 "module": module,
237 |                 "dependent_count": len(dependents),
238 |                 "dependents": list(dependents)
239 |             }
240 |             for module, dependents in reverse_dependencies.items()
241 |             if len(dependents) > 3
242 |         ]
243 | 
244 |         # Detect circular dependencies
245 |         circular_deps = self._detect_circular_dependencies(dependencies)
246 | 
247 |         return {
248 |             "total_modules": len(dependencies),
249 |             "total_dependencies": total_dependencies,
250 |             "average_dependencies_per_module": total_dependencies / len(dependencies) if dependencies else 0,
251 |             "highly_coupled_modules": highly_coupled,
252 |             "popular_modules": popular_modules,
253 |             "circular_dependencies": circular_deps,
254 |             "dependency_graph": {k: list(v) for k, v in dependencies.items()}
255 |         }
256 | 
257 |     def calculate_quality_metrics(self) -> Dict[str, Any]:
258 |         """Calculate overall code quality metrics."""
259 |         functions = self.graph.get_nodes_by_type(NodeType.FUNCTION)
260 |         modules = self.graph.get_nodes_by_type(NodeType.MODULE)
261 | 
262 |         if not functions:
263 |             return {
264 |                 "maintainability_index": 0,
265 |                 "technical_debt_ratio": 0,
266 |                 "test_coverage_estimate": 0,
267 |                 "documentation_ratio": 0,
268 |                 "code_duplication_ratio": 0
269 |             }
270 | 
271 |         # Calculate maintainability index (simplified)
272 |         complexities = [func.complexity for func in functions if func.complexity > 0]
273 |         avg_complexity = sum(complexities) / len(complexities) if complexities else 1
274 | 
275 |         total_lines = sum(node.line_count for node in self.graph.nodes.values() if node.line_count > 0)
276 | 
277 |         # Maintainability index (0-100, higher is better)
278 |         maintainability = max(0, 100 - (avg_complexity * 5) - (total_lines / 1000))
279 | 
280 |         # Technical debt ratio (estimated based on code smells)
281 |         code_smells = self.detect_code_smells()
282 |         total_smells = sum(len(smells) for smells in code_smells.values())
283 |         debt_ratio = min(100, (total_smells / len(functions)) * 100) if functions else 0
284 | 
285 |         # Documentation ratio (functions with docstrings)
286 |         documented_functions = len([f for f in functions if f.docstring])
287 |         doc_ratio = (documented_functions / len(functions)) * 100 if functions else 0
288 | 
289 |         # Estimate test coverage based on file patterns
290 |         test_files = [
291 |             node for node in modules
292 |             if any(pattern in node.name.lower() for pattern in ["test", "spec", "_test", ".test"])
293 |         ]
294 |         test_coverage_estimate = min(100, (len(test_files) / len(modules)) * 200) if modules else 0
295 | 
296 |         # Calculate duplication ratio based on duplicate patterns found
297 |         code_smells = self.detect_code_smells()
298 |         duplicate_patterns = code_smells.get("duplicate_logic", [])
299 |         total_functions = len(self.graph.get_nodes_by_type(NodeType.FUNCTION))
300 | 
301 |         duplicate_function_count = sum(len(pattern["functions"]) for pattern in duplicate_patterns)
302 |         duplication_ratio = (duplicate_function_count / total_functions * 100) if total_functions > 0 else 0
303 | 
304 |         return {
305 |             "maintainability_index": round(maintainability, 2),
306 |             "technical_debt_ratio": round(debt_ratio, 2),
307 |             "test_coverage_estimate": round(test_coverage_estimate, 2),
308 |             "documentation_ratio": round(doc_ratio, 2),
309 |             "code_duplication_ratio": round(duplication_ratio, 2),
310 |             "total_code_smells": total_smells,
311 |             "quality_score": round(self._calculate_normalized_quality_score(
312 |                 maintainability, doc_ratio, test_coverage_estimate, debt_ratio, duplication_ratio
313 |             ), 2)
314 |         }
315 | 
316 |     def get_language_distribution(self) -> Dict[str, Any]:
317 |         """Get distribution of languages in the project."""
318 |         language_stats: Dict[str, Dict[str, Union[int, float]]] = defaultdict(lambda: {
319 |             "files": 0,
320 |             "nodes": 0,
321 |             "functions": 0,
322 |             "classes": 0,
323 |             "lines": 0
324 |         })
325 | 
326 |         for node in self.graph.nodes.values():
327 |             if node.language:
328 |                 lang = node.language
329 |                 language_stats[lang]["nodes"] += 1
330 |                 language_stats[lang]["lines"] += node.line_count
331 | 
332 |                 if node.node_type == NodeType.MODULE:
333 |                     language_stats[lang]["files"] += 1
334 |                 elif node.node_type == NodeType.FUNCTION:
335 |                     language_stats[lang]["functions"] += 1
336 |                 elif node.node_type == NodeType.CLASS:
337 |                     language_stats[lang]["classes"] += 1
338 | 
339 |         # Calculate percentages
340 |         total_files = sum(stats["files"] for stats in language_stats.values())
341 |         total_lines = sum(stats["lines"] for stats in language_stats.values())
342 | 
343 |         for lang, stats in language_stats.items():
344 |             stats["file_percentage"] = (stats["files"] / total_files * 100) if total_files else 0.0
345 |             stats["line_percentage"] = (stats["lines"] / total_lines * 100) if total_lines else 0.0
346 | 
347 |         # Sort by number of lines (descending)
348 |         sorted_languages = sorted(
349 |             language_stats.items(),
350 |             key=lambda x: x[1]["lines"],
351 |             reverse=True
352 |         )
353 | 
354 |         return {
355 |             "languages": dict(sorted_languages),
356 |             "primary_language": sorted_languages[0][0] if sorted_languages else None,
357 |             "total_languages": len(language_stats),
358 |             "polyglot_score": min(len(language_stats), 10) * 10  # 0-100 score
359 |         }
360 | 
361 |     def _calculate_normalized_quality_score(self, maintainability: float, doc_ratio: float,
362 |                                           test_coverage: float, debt_ratio: float,
363 |                                           duplication_ratio: float) -> float:
364 |         """Calculate a normalized quality score between 0 and 100."""
365 |         # Normalize all inputs to 0-100 scale
366 |         maintainability = max(0, min(100, maintainability))
367 |         doc_ratio = max(0, min(100, doc_ratio))
368 |         test_coverage = max(0, min(100, test_coverage))
369 |         debt_ratio = max(0, min(100, debt_ratio))
370 |         duplication_ratio = max(0, min(100, duplication_ratio))
371 | 
372 |         # Calculate weighted score (positive factors - negative factors)
373 |         positive_score = (maintainability * 0.4 + doc_ratio * 0.2 + test_coverage * 0.3)
374 |         negative_score = (debt_ratio * 0.3 + duplication_ratio * 0.2)
375 | 
376 |         # Final score between 0 and 100
377 |         quality_score = positive_score - negative_score
378 |         return max(0, min(100, quality_score))
379 | 
380 |     def find_similar_functions(self, function_name: str, similarity_threshold: float = 0.7) -> List[Dict[str, Any]]:
381 |         """Find functions similar to the given function."""
382 |         target_function = None
383 |         for node in self.graph.nodes.values():
384 |             if node.name == function_name and node.node_type == NodeType.FUNCTION:
385 |                 target_function = node
386 |                 break
387 | 
388 |         if not target_function:
389 |             return []
390 | 
391 |         similar_functions = []
392 |         functions = self.graph.get_nodes_by_type(NodeType.FUNCTION)
393 | 
394 |         for func in functions:
395 |             if func.id == target_function.id:
396 |                 continue
397 | 
398 |             similarity = self._calculate_function_similarity(target_function, func)
399 |             if similarity >= similarity_threshold:
400 |                 similar_functions.append({
401 |                     "name": func.name,
402 |                     "location": f"{func.location.file_path}:{func.location.start_line}",
403 |                     "language": func.language,
404 |                     "similarity": similarity,
405 |                     "complexity": func.complexity
406 |                 })
407 | 
408 |         return sorted(similar_functions, key=lambda x: x["similarity"], reverse=True)
409 | 
410 |     def _find_duplicate_patterns(self, functions: List[UniversalNode]) -> List[Dict[str, Any]]:
411 |         """Find potentially duplicate code patterns."""
412 |         duplicates = []
413 | 
414 |         # Group functions by similar characteristics
415 |         function_groups = defaultdict(list)
416 | 
417 |         for func in functions:
418 |             # Group by complexity and line count (simplified)
419 |             if func.complexity > 5 and func.line_count > 10:
420 |                 key = (func.complexity, func.line_count // 5 * 5)  # Round to nearest 5
421 |                 function_groups[key].append(func)
422 | 
423 |         # Find groups with multiple functions
424 |         for key, group in function_groups.items():
425 |             if len(group) > 1:
426 |                 duplicates.append({
427 |                     "pattern": f"Functions with complexity {key[0]} and ~{key[1]} lines",
428 |                     "count": len(group),
429 |                     "functions": [
430 |                         {
431 |                             "name": func.name,
432 |                             "location": f"{func.location.file_path}:{func.location.start_line}",
433 |                             "language": func.language
434 |                         }
435 |                         for func in group
436 |                     ],
437 |                     "severity": "medium" if len(group) < 4 else "high"
438 |                 })
439 | 
440 |         return duplicates
441 | 
442 |     def _find_dead_code(self) -> List[Dict[str, Any]]:
443 |         """Find potentially dead (unused) code."""
444 |         dead_code = []
445 | 
446 |         # Find functions that are never called
447 |         all_functions = {node.id: node for node in self.graph.get_nodes_by_type(NodeType.FUNCTION)}
448 |         called_functions = set()
449 | 
450 |         # Find all function calls
451 |         call_relationships = self.graph.get_relationships_by_type(RelationshipType.CALLS)
452 |         for rel in call_relationships:
453 |             called_functions.add(rel.target_id)
454 | 
455 |         # Functions that are defined but never called
456 |         for func_id, func in all_functions.items():
457 |             if func_id not in called_functions:
458 |                 # Skip entry points and special methods
459 |                 if not self._is_entry_point(func):
460 |                     dead_code.append({
461 |                         "name": func.name,
462 |                         "type": "function",
463 |                         "location": f"{func.location.file_path}:{func.location.start_line}",
464 |                         "language": func.language,
465 |                         "reason": "Never called",
466 |                         "severity": "medium"
467 |                     })
468 | 
469 |         return dead_code
470 | 
471 |     def _detect_circular_dependencies(self, dependencies: Dict[str, Set[str]]) -> List[Dict[str, Any]]:
472 |         """Detect circular dependencies using DFS."""
473 |         circular_deps = []
474 |         visited = set()
475 |         rec_stack = set()
476 | 
477 |         def dfs(node: str, path: List[str]) -> None:
478 |             if node in rec_stack:
479 |                 # Found a cycle
480 |                 cycle_start = path.index(node)
481 |                 cycle = path[cycle_start:] + [node]
482 |                 circular_deps.append({
483 |                     "cycle": cycle,
484 |                     "length": len(cycle) - 1,
485 |                     "severity": "high" if len(cycle) <= 3 else "medium"
486 |                 })
487 |                 return
488 | 
489 |             if node in visited:
490 |                 return
491 | 
492 |             visited.add(node)
493 |             rec_stack.add(node)
494 | 
495 |             for neighbor in dependencies.get(node, set()):
496 |                 dfs(neighbor, path + [node])
497 | 
498 |             rec_stack.remove(node)
499 | 
500 |         for module in dependencies:
501 |             if module not in visited:
502 |                 dfs(module, [])
503 | 
504 |         return circular_deps
505 | 
506 |     def _calculate_function_similarity(self, func1: UniversalNode, func2: UniversalNode) -> float:
507 |         """Calculate similarity between two functions."""
508 |         # Simple similarity based on multiple factors
509 |         similarity_factors = []
510 | 
511 |         # Name similarity (Levenshtein distance)
512 |         name_similarity = 1.0 - (self._levenshtein_distance(func1.name, func2.name) / max(len(func1.name), len(func2.name)))
513 |         similarity_factors.append(name_similarity * 0.3)
514 | 
515 |         # Complexity similarity
516 |         if func1.complexity > 0 and func2.complexity > 0:
517 |             complexity_diff = abs(func1.complexity - func2.complexity)
518 |             complexity_similarity = 1.0 / (1.0 + complexity_diff)
519 |             similarity_factors.append(complexity_similarity * 0.2)
520 | 
521 |         # Line count similarity
522 |         if func1.line_count > 0 and func2.line_count > 0:
523 |             line_diff = abs(func1.line_count - func2.line_count)
524 |             line_similarity = 1.0 / (1.0 + line_diff / 10.0)
525 |             similarity_factors.append(line_similarity * 0.2)
526 | 
527 |         # Language similarity
528 |         if func1.language == func2.language:
529 |             similarity_factors.append(0.3)
530 | 
531 |         return sum(similarity_factors) if similarity_factors else 0.0
532 | 
533 |     def _levenshtein_distance(self, s1: str, s2: str) -> int:
534 |         """Calculate Levenshtein distance between two strings."""
535 |         if len(s1) < len(s2):
536 |             return self._levenshtein_distance(s2, s1)
537 | 
538 |         if len(s2) == 0:
539 |             return len(s1)
540 | 
541 |         previous_row = list(range(len(s2) + 1))
542 |         for i, c1 in enumerate(s1):
543 |             current_row = [i + 1]
544 |             for j, c2 in enumerate(s2):
545 |                 insertions = previous_row[j + 1] + 1
546 |                 deletions = current_row[j] + 1
547 |                 substitutions = previous_row[j] + (c1 != c2)
548 |                 current_row.append(min(insertions, deletions, substitutions))
549 |             previous_row = current_row
550 | 
551 |         return previous_row[-1]
552 | 
553 |     def _is_entry_point(self, func: UniversalNode) -> bool:
554 |         """Check if a function is likely an entry point."""
555 |         entry_point_patterns = [
556 |             "main", "__main__", "init", "__init__", "setup", "run",
557 |             "start", "begin", "execute", "handler", "callback"
558 |         ]
559 | 
560 |         return any(
561 |             pattern in func.name.lower()
562 |             for pattern in entry_point_patterns
563 |         )
564 | 
565 |     def export_analysis_report(self, output_path: Path) -> None:
566 |         """Export comprehensive analysis report to a file."""
567 |         analysis = self.analyze_project()
568 | 
569 |         report_content = f"""# Code Analysis Report
570 | 
571 | ## Project Overview
572 | - **Project Root**: {self.project_root}
573 | - **Total Files Parsed**: {analysis['parsed_files']}
574 | - **Total Languages**: {analysis['language_distribution']['total_languages']}
575 | - **Primary Language**: {analysis['language_distribution']['primary_language']}
576 | 
577 | ## Code Statistics
578 | - **Total Nodes**: {analysis['total_nodes']:,}
579 | - **Total Relationships**: {analysis['total_relationships']:,}
580 | - **Functions**: {analysis['nodes_by_type'].get('function', 0):,}
581 | - **Classes**: {analysis['nodes_by_type'].get('class', 0):,}
582 | 
583 | ## Quality Metrics
584 | - **Maintainability Index**: {analysis['quality_metrics']['maintainability_index']}/100
585 | - **Technical Debt Ratio**: {analysis['quality_metrics']['technical_debt_ratio']}%
586 | - **Documentation Ratio**: {analysis['quality_metrics']['documentation_ratio']}%
587 | - **Quality Score**: {analysis['quality_metrics']['quality_score']}/100
588 | 
589 | ## Code Smells Detected
590 | - **Long Functions**: {len(analysis['code_smells']['long_functions'])}
591 | - **Complex Functions**: {len(analysis['code_smells']['complex_functions'])}
592 | - **Large Classes**: {len(analysis['code_smells']['large_classes'])}
593 | - **Potential Duplicates**: {len(analysis['code_smells']['duplicate_logic'])}
594 | 
595 | ## Complexity Analysis
596 | - **Average Complexity**: {analysis['complexity_analysis']['average_complexity']:.2f}
597 | - **Max Complexity**: {analysis['complexity_analysis']['max_complexity']}
598 | - **High Complexity Functions**: {analysis['complexity_analysis']['functions_above_threshold']}
599 | 
600 | ## Dependencies
601 | - **Total Modules**: {analysis['dependency_analysis']['total_modules']}
602 | - **Average Dependencies**: {analysis['dependency_analysis']['average_dependencies_per_module']:.2f}
603 | - **Circular Dependencies**: {len(analysis['dependency_analysis']['circular_dependencies'])}
604 | """
605 | 
606 |         output_path.write_text(report_content, encoding='utf-8')
607 |         logger.info("Analysis report exported to: %s", output_path)
608 | 
609 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
  1 | # Changelog
  2 | 
  3 | All notable changes to this project will be documented in this file.
  4 | 
  5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
  6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
  7 | 
  8 | ## [1.2.3] - 2025-01-27
  9 | 
 10 | ### 🔧 Patch Release: Complete JSON Serialization Fix
 11 | 
 12 | This patch release fixes the `from_json()` method to properly reconstruct graph objects from JSON data, completing the architectural migration.
 13 | 
 14 | #### 🛠️ JSON Deserialization Fix
 15 | - **Complete Object Reconstruction** - `from_json()` now properly recreates `UniversalNode` and `UniversalRelationship` objects from JSON data
 16 | - **Proper Index Storage** - Rustworkx indices are correctly stored in reconstructed objects (`_rustworkx_index`, `_rustworkx_edge_index`)
 17 | - **Full Graph Restoration** - Restored graphs are fully functional with all operations working correctly
 18 | - **Robust Error Handling** - Graceful handling of malformed JSON data with detailed logging
 19 | 
 20 | #### 🎯 Technical Implementation
 21 | - **Object Recreation** - Reconstructs `UniversalLocation`, `UniversalNode`, and `UniversalRelationship` from JSON attributes
 22 | - **Index Management** - Properly assigns rustworkx indices to reconstructed objects
 23 | - **Graph Consistency** - Ensures restored graph maintains all architectural patterns
 24 | - **Import Addition** - Added `UniversalLocation` import for proper object reconstruction
 25 | 
 26 | #### ✅ Verification
 27 | - **JSON Round-trip** - Serialization → Deserialization → Full functionality confirmed
 28 | - **Graph Operations** - All methods work correctly on restored graphs
 29 | - **Test Suite** - JSON serialization/deserialization tests passing
 30 | - **Production Ready** - Complete and robust JSON handling
 31 | 
 32 | ---
 33 | 
 34 | ## [1.2.2] - 2025-01-27
 35 | 
 36 | ### 🔧 Patch Release: Complete Architectural Migration
 37 | 
 38 | This patch release completes the architectural migration by eliminating the final references to deprecated edge mapping dictionaries.
 39 | 
 40 | #### 🛠️ Final Fixes
 41 | - **Edge Mapping Cleanup** - Removed final `edge_id_to_index` and `index_to_edge_id` references in `from_json()` method
 42 | - **Consistent Architecture** - All methods now use direct graph storage patterns consistently
 43 | - **Complete Migration** - Architectural redesign fully completed with no legacy mapping references
 44 | 
 45 | #### 🎯 Technical Details
 46 | - **`from_json()` Method** - Fixed lines 1376-1377 to use `relationship._rustworkx_edge_index` instead of deprecated dictionaries
 47 | - **Edge Index Storage** - Consistent use of relationship object attributes for edge index storage
 48 | - **Zero Legacy References** - No remaining references to old index mapping system
 49 | 
 50 | #### ✅ Verification
 51 | - **All Methods Working** - Complete test suite confirms no AttributeError crashes
 52 | - **Architectural Consistency** - All graph operations use unified direct storage approach
 53 | - **Production Stability** - Final cleanup ensures long-term maintainability
 54 | 
 55 | ---
 56 | 
 57 | ## [1.2.1] - 2025-01-27
 58 | 
 59 | ### 🐛 Critical Bug Fix Release: Resolved Tool Hanging Issues
 60 | 
 61 | This critical patch release fixes **AttributeError crashes** that were causing MCP tools to hang and timeout, resolving a major stability issue introduced during architectural improvements.
 62 | 
 63 | #### 🔥 Critical Fixes
 64 | - **Tool Hanging Resolution** - Fixed 6 methods that were crashing with `AttributeError: 'RustworkxCodeGraph' object has no attribute 'index_to_node_id'`
 65 | - **Graph Method Stability** - All graph analysis methods now work correctly without crashes
 66 | - **MCP Tool Reliability** - Tools no longer hang or timeout due to internal crashes
 67 | - **Complete Architecture Migration** - Finished migration from index mapping dictionaries to direct graph storage
 68 | 
 69 | #### 🛠️ Methods Fixed
 70 | - **`find_bridges()`** - Fixed `self.index_to_node_id.get(edge[0])` → `self.graph[edge[0]]`
 71 | - **`calculate_graph_distance_matrix()`** - Fixed index mapping loops to use direct graph iteration
 72 | - **`calculate_bellman_ford_path_lengths()`** - Fixed index lookups to use `self.graph[index]`
 73 | - **`calculate_weighted_shortest_paths()`** - Fixed `self.node_id_to_index.get()` → `getattr(node, '_rustworkx_index')`
 74 | - **`find_node_layers()`** - Fixed index mapping to use proper node lookup pattern
 75 | - **`from_json()`** - Completely rewrote to use direct graph storage without index dictionaries
 76 | 
 77 | #### 🎯 Root Cause Analysis
 78 | - **Issue**: Incomplete migration from old index mapping system (`index_to_node_id`, `node_id_to_index`) to new direct storage approach
 79 | - **Impact**: Methods crashed with AttributeError when called, causing tools to hang and timeout
 80 | - **Solution**: Consistent use of `self.graph[index]` to get node ID from rustworkx index and `getattr(node, '_rustworkx_index')` for reverse lookup
 81 | 
 82 | #### ✅ Verification
 83 | - **All 6 Methods Working** - Comprehensive testing confirms no more AttributeError crashes
 84 | - **29/29 Tests Passing** - Full test suite validates stability
 85 | - **MCP Tools Functional** - All 9 tools now work without hanging
 86 | - **Production Ready** - No more timeout issues or tool failures
 87 | 
 88 | #### 🚀 Performance Impact
 89 | - **Zero Performance Degradation** - Fixes maintain original performance characteristics
 90 | - **Improved Reliability** - Tools complete successfully instead of crashing
 91 | - **Better User Experience** - No more mysterious hangs or timeouts
 92 | 
 93 | ---
 94 | 
 95 | ## [1.2.0] - 2025-01-27
 96 | 
 97 | ### 🎯 Major Feature Release: Enhanced Tool Guidance & AI Optimization
 98 | 
 99 | This major release introduces **comprehensive tool usage guidance** inspired by Serena's approach, dramatically improving AI model effectiveness and user experience through rich descriptions, workflow recommendations, and best practices.
100 | 
101 | #### ✨ Added
102 | - **Comprehensive Usage Guide Tool** - New `get_usage_guide` tool provides complete guidance document with workflows, best practices, and examples
103 | - **Rich Tool Descriptions** - Enhanced all 8 tools with structured guidance using visual hierarchy (🎯 PURPOSE, 🔧 USAGE, ⚡ PERFORMANCE, 🔄 WORKFLOW, 💡 TIP)
104 | - **Performance-Aware Design** - Clear performance expectations for Fast (<3s), Moderate (3-15s), and Expensive (10-60s) operations
105 | - **Workflow Orchestration** - Optimal tool sequences for Code Exploration, Refactoring Analysis, and Architecture Analysis
106 | - **Visual Hierarchy** - Emoji-based categorization for quick scanning and improved readability
107 | 
108 | #### 🔧 Enhanced
109 | - **Tool Parameter Descriptions** - Enriched with usage context, constraints, and performance implications
110 | - **Best Practices Integration** - Embedded guidance on when and how to use each tool effectively
111 | - **Common Pitfalls Documentation** - Clear warnings about expensive operations and usage mistakes
112 | - **Use Case Examples** - Step-by-step workflows for common scenarios ("understand codebase", "refactor function X", "find code smells")
113 | 
114 | #### 🎯 AI Model Optimization
115 | - **Reduced Trial-and-Error** - Clear guidance prevents ineffective tool combinations
116 | - **Improved Tool Orchestration** - AI models understand optimal workflows and tool relationships
117 | - **Strategic Tool Usage** - Performance awareness leads to more efficient analysis patterns
118 | - **Context-Aware Recommendations** - Tools suggest when to use other tools for complete analysis
119 | 
120 | #### 📊 Workflow Patterns
121 | - **Foundation Tools** - `analyze_codebase` (required first), `project_statistics` (overview)
122 | - **Symbol Analysis** - `find_definition` → `find_references` → `find_callers`/`find_callees`
123 | - **Quality Analysis** - `complexity_analysis` + `dependency_analysis` for refactoring roadmaps
124 | - **Architecture Analysis** - `dependency_analysis` → `project_statistics` → `complexity_analysis`
125 | 
126 | #### 🚀 Performance Guidelines
127 | - **Fast Operations** - `find_definition`, `find_references`, `find_callers`, `find_callees`, `project_statistics` (use freely)
128 | - **Moderate Operations** - `complexity_analysis`, `dependency_analysis` (strategic use, cached results)
129 | - **Expensive Operations** - `analyze_codebase` (only when needed, results persist)
130 | 
131 | #### 💡 Innovation Beyond Industry Standards
132 | - **Visual Hierarchy** - Emoji-based categorization for instant comprehension
133 | - **Performance-First Design** - Speed expectations clearly marked for optimal usage
134 | - **Workflow-Centric Approach** - Emphasizes tool orchestration over individual tool usage
135 | - **Comprehensive Pitfall Prevention** - Proactive guidance to avoid common mistakes
136 | 
137 | #### 🛠️ Technical Implementation
138 | - **9 Enhanced Tools** - All tools now include comprehensive guidance
139 | - **Zero Performance Impact** - Guidance is descriptive metadata with no runtime overhead
140 | - **Production Ready** - All tests passing, zero linting errors
141 | - **Backward Compatible** - Existing tool functionality unchanged
142 | 
143 | #### 📚 Documentation Quality
144 | - **Professional Formatting** - Consistent structure across all tool descriptions
145 | - **Copy-Paste Ready** - All examples and workflows ready for immediate use
146 | - **Comprehensive Coverage** - Every tool includes purpose, usage, performance, workflow, and tips
147 | - **User-Centric Design** - Focused on practical guidance for real-world usage scenarios
148 | 
149 | ---
150 | 
151 | ## [1.1.1] - 2025-07-26
152 | 
153 | ### 📚 Documentation Release: Enhanced MCP Host Integration
154 | 
155 | This patch release updates documentation with comprehensive MCP host integration instructions and special recognition for Zencoder.
156 | 
157 | #### 📖 Enhanced
158 | - **Zencoder Integration** - Added special configuration for the best AI coding tool ⭐
159 | - **9+ MCP Hosts Supported** - Comprehensive setup instructions for all major MCP clients
160 | - **Enhanced Configuration** - Added file watcher options, environment variables, and troubleshooting
161 | - **Docker Integration** - Complete containerized deployment examples
162 | - **Professional Documentation** - Improved formatting and user experience
163 | 
164 | #### 🔧 MCP Hosts Added
165 | - **Claude Desktop** - CLI and manual configuration
166 | - **VS Code Extensions** - Cline, Continue, Cursor
167 | - **Editors** - Zed, Windsurf
168 | - **AI Assistants** - Zencoder ⭐, Aider, Open WebUI
169 | - **Generic MCP Client** - Universal configuration template
170 | 
171 | #### 🎯 User Experience
172 | - **Copy-Paste Ready** - All configuration examples ready to use
173 | - **Platform Aware** - OS-specific paths and commands
174 | - **Troubleshooting Guide** - Common issues and debug instructions
175 | - **File Watcher Documentation** - Complete v1.1.0 feature guide
176 | 
177 | ---
178 | 
179 | ## [1.1.0] - 2025-07-26
180 | 
181 | ### 🚀 Major Feature Release: Debounced File Watcher
182 | 
183 | This major release introduces **automatic file change detection** with intelligent debouncing, making the MCP server significantly more responsive and user-friendly for development workflows.
184 | 
185 | #### ✨ Added
186 | - **Debounced File Watcher** - Automatic detection of file changes with 2-second intelligent debouncing
187 | - **Real-time Graph Updates** - Code graph automatically updates when source files are modified
188 | - **Thread-Safe Architecture** - Watchdog observer with proper async/await coordination using `loop.call_soon_threadsafe()`
189 | - **Smart File Filtering** - Respects .gitignore patterns and only watches supported file extensions (25+ languages)
190 | - **Duplicate Change Prevention** - Recent changes tracking prevents redundant re-analysis
191 | 
192 | #### 🔧 Enhanced
193 | - **Cache Management Integration** - File watcher triggers comprehensive cache clearing before re-analysis
194 | - **Project Statistics** - Added file watcher status and statistics to project stats output
195 | - **Graceful Cleanup** - Proper file watcher shutdown and resource cleanup on server termination
196 | - **Error Recovery** - Robust error handling with fallback to manual analysis if watcher fails
197 | 
198 | #### ⚡ Performance Improvements
199 | - **Instant Response** - No more manual re-analysis needed when files change
200 | - **Efficient Batching** - Multiple rapid changes trigger only one re-analysis after debounce delay
201 | - **Resource Optimization** - Debouncing prevents CPU/memory spikes during bulk file operations
202 | - **Cache Efficiency** - Maintains 70%+ cache hit rates while ensuring data freshness
203 | 
204 | #### 🛠️ Technical Implementation
205 | - **Watchdog Integration** - Added `watchdog>=6.0.0` dependency for cross-platform file monitoring
206 | - **Event Loop Management** - Proper asyncio event loop handling between threads
207 | - **Debounce Logic** - Intelligent 2-second delay with change batching and duplicate filtering
208 | - **Memory Management** - Bounded cache sizes with automatic cleanup timers
209 | 
210 | #### 📊 Verification
211 | - **Comprehensive Testing** - Verified automatic re-analysis on file modifications
212 | - **Debounce Effectiveness** - Confirmed rapid changes are properly batched
213 | - **Thread Safety** - No race conditions between watcher thread and main event loop
214 | - **Resource Cleanup** - Proper shutdown prevents memory leaks and hanging processes
215 | 
216 | #### 🎯 User Experience
217 | - **Zero Configuration** - File watcher starts automatically after first analysis
218 | - **Development Friendly** - Perfect for active development with frequent file changes
219 | - **Production Ready** - Robust error handling and graceful degradation
220 | - **Status Visibility** - File watcher status included in project statistics
221 | 
222 | #### 📚 Documentation
223 | - **Comprehensive MCP Host Integration** - Added setup instructions for 9+ MCP hosts
224 | - **Zencoder Integration** - Special configuration for the best AI coding tool ⭐
225 | - **Enhanced README** - Docker, troubleshooting, and configuration options
226 | - **File Watcher Documentation** - Complete feature documentation and usage guide
227 | 
228 | ---
229 | 
230 | ## [1.0.9] - 2025-07-26
231 | 
232 | ### Symbol Search Fix Release
233 | 
234 | #### 🔧 Fixed
235 | - **Symbol Search Functionality** - Fixed critical bug where exact_match=True prevented partial symbol matching
236 | - **MCP Tool Responses** - All 8 MCP tools now properly find and return code symbols and definitions
237 | - **Search Coverage** - Symbol searches now find partial matches (e.g., "CodeGraph" finds "RustworkxCodeGraph")
238 | - **Function Discovery** - find_definition, find_references, find_callers, and find_callees now work correctly
239 | 
240 | #### 🚀 Performance
241 | - **Removed Analysis Caching** - Eliminated _is_analyzed flag that prevented fresh analysis on each request
242 | - **Real-time Analysis** - Each MCP tool call now performs fresh project analysis for accurate results
243 | - **Debug Logging** - Added comprehensive logging for troubleshooting file discovery and parsing
244 | 
245 | #### 📊 Verification
246 | - **Direct Testing** - Verified 20+ files parsed with 600+ nodes and 800+ relationships
247 | - **Symbol Coverage** - Confirmed detection of classes, functions, and modules across codebase
248 | - **Search Accuracy** - Multiple symbol searches now return expected results with proper file locations
249 | 
250 | ## [1.0.8] - 2025-07-26
251 | 
252 | ### Critical Performance and Reliability Fixes
253 | 
254 | #### 🔥 Critical Fixes
255 | - **File Discovery Performance** - Added comprehensive .gitignore pattern matching and common directory exclusion
256 | - **Tool Timeout Resolution** - Fixed 2+ minute timeouts by preventing analysis of massive REFERENCE directories
257 | - **Warning Spam Elimination** - Changed "Cannot add relationship: missing nodes" from WARNING to DEBUG level
258 | - **Clojure Language Removal** - Eliminated clojure support that was causing ast-grep crashes
259 | 
260 | #### ⚡ Performance Improvements  
261 | - **Directory Filtering** - Skip build/cache/dependency directories: __pycache__, node_modules, .git, dist, build
262 | - **Pattern Matching** - Efficient fnmatch-based .gitignore pattern implementation
263 | - **Response Times** - All 8 MCP tools now complete in under 30 seconds (previously 2+ minutes)
264 | 
265 | #### 🛠️ Technical Changes
266 | - **File Path Filtering** - Enhanced _should_ignore_path with comprehensive skip patterns
267 | - **Logging Levels** - Reduced noise by moving debug messages to appropriate log levels
268 | - **Error Handling** - Improved robustness for large codebases with proper timeout management
269 | 
270 | #### ✅ Verification
271 | - **8/8 Tools Working** - All MCP tools verified functional with proper response times
272 | - **No Timeouts** - Eliminated hanging and timeout issues completely
273 | - **Clean Output** - Removed warning spam for better user experience
274 | 
275 | ## [1.0.7] - 2025-07-25
276 | 
277 | ### Performance Optimization Release
278 | 
279 | #### ⚡ Enhanced
280 | - **Aggressive LRU Caching** - Implemented comprehensive caching across all performance-critical functions
281 | - **Memory Optimization** - Cache sizes optimized for 500+ file codebases with 500MB memory allocation
282 | - **Hashable Data Structures** - Made LanguageConfig frozen dataclass with tuple fields for cache compatibility
283 | - **Code Duplication Analysis** - Implemented actual duplicate code detection replacing placeholder
284 | 
285 | #### 🚀 Performance Improvements
286 | - **PageRank**: Up to 4.9M nodes/second processing speed
287 | - **Betweenness Centrality**: Up to 104K nodes/second processing speed
288 | - **Cache Effectiveness**: 50-90% speed improvements on repeated operations
289 | - **Sub-microsecond Response**: Cache hits deliver sub-microsecond response times
290 | 
291 | #### 🐛 Fixed
292 | - **Type Safety** - Resolved Pylance errors for LanguageConfig hashability
293 | - **Boolean Return Types** - Fixed type checking issues in line processing functions
294 | - **Graph Reconstruction** - Implemented complete fallback graph reconstruction from JSON data
295 | 
296 | #### 🧪 Technical Changes
297 | - Cache sizes: 300K for variable references, 200K for function calls, 100K for node lookups
298 | - Converted all LanguageConfig list fields to tuples for immutability and hashability
299 | - Added comprehensive performance benchmarks and cache effectiveness tests
300 | 
301 | ---
302 | 
303 | ## [1.0.8] - 2025-07-26
304 | 
305 | ### 🚀 Production Release: Performance & Reliability Fixes
306 | 
307 | This critical release resolves major performance and reliability issues that prevented proper tool functionality.
308 | 
309 | #### 🐛 Fixed
310 | - **Tool Timeout Issues** - Fixed 2+ minute timeouts by implementing proper .gitignore file filtering
311 | - **REFERENCE Directory Analysis** - Massive performance improvement by excluding reference materials from analysis
312 | - **Warning Spam** - Silenced hundreds of "missing nodes" warnings that cluttered output
313 | - **File Discovery** - Added comprehensive common directory exclusion (build/, dist/, node_modules/, etc.)
314 | 
315 | #### ⚡ Performance
316 | - **Dramatic Speed Improvement** - Tools now complete in 15-30 seconds instead of timing out
317 | - **Smart File Filtering** - Respects .gitignore patterns plus common build/cache directories
318 | - **Clean Output** - Eliminated debug warning spam for better user experience
319 | - **Memory Efficiency** - Reduced memory usage by skipping irrelevant files
320 | 
321 | #### ✅ Verified
322 | - **All 8 Tools Working** - Comprehensive test confirms 100% success rate (8/8 tools functional)
323 | - **Fast Analysis** - Complete project analysis in under 30 seconds
324 | - **Production Ready** - No more timeouts, crashes, or excessive warnings
325 | 
326 | #### 🛠️ Technical Changes
327 | - Added proper .gitignore pattern matching with fnmatch
328 | - Implemented common directory skip list (20+ patterns)
329 | - Changed relationship warnings from WARNING to DEBUG level
330 | - Enhanced file discovery with smart filtering
331 | 
332 | ---
333 | 
334 | ## [1.0.6] - 2025-07-25
335 | 
336 | ### 🛠️ Language Support Update: Clojure Removed
337 | 
338 | This release removes Clojure language support to resolve runtime crashes and ensures stable operation across all supported languages.
339 | 
340 | #### 🐛 Fixed
341 | - **Runtime Crash Fix** - Removed Clojure language configuration that was causing ast-grep panic crashes
342 | - **Server Stability** - All 8 MCP tools now function correctly without crash interruptions
343 | - **Project Analysis** - Server can now successfully analyze large codebases without language-related failures
344 | 
345 | #### ✅ Verified
346 | - **All Tools Working** - Comprehensive test confirms all 8 tools return meaningful data
347 | - **Performance Improved** - Analysis now completes successfully: 935 files parsed, 23,256 nodes, 22,321 relationships
348 | - **Production Ready** - No more runtime panics or tool execution failures
349 | 
350 | #### ⚡ Performance
351 | - **Language Count** - Now supports 25 languages (down from 26, Clojure removed)
352 | - **Parsing Speed** - Faster analysis without problematic language processing
353 | - **Memory Efficiency** - Reduced memory usage without Clojure AST overhead
354 | 
355 | ---
356 | 
357 | ## [1.0.5] - 2025-07-25
358 | 
359 | ### 🚀 Critical Fix: MCP Tool Exposure Resolved
360 | 
361 | This critical release fixes the MCP tool exposure issue that prevented tools from being accessible in Claude Code.
362 | 
363 | #### 🐛 Fixed
364 | - **CRITICAL MCP Tool Exposure** - Fixed issue where MCP tools were not properly accessible through Claude Code interface
365 | - **SDK Compliance** - Updated function signatures to match official Python SDK patterns exactly
366 | - **Type Annotations** - Changed `Dict[str, Any]` → `dict`, `List[types.TextContent]` → `list[types.TextContent]`
367 | - **Tool Dispatch** - Replaced complex handler dispatch with simple if/elif pattern following SDK examples
368 | 
369 | #### ✅ Verified
370 | - **Tool Accessibility** - All 8 tools now properly exposed and accessible: `claude mcp list` shows "✓ Connected"
371 | - **SDK Pattern Compliance** - Server implementation matches official Python SDK examples exactly
372 | - **Connectivity Testing** - Comprehensive test confirms "SUCCESS: MCP server is properly exposing 8 tools"
373 | 
374 | #### 📊 Added
375 | - **Connectivity Test Suite** - Added `test_mcp_connectivity.py` for MCP integration verification
376 | - **Comprehensive Test Report** - Added detailed `MCP_TOOLS_TEST_REPORT.md` with technical specifications
377 | - **Production Verification** - Confirmed all 8 tools working correctly in production environment
378 | 
379 | #### 🏗️ Technical Changes
380 | - Simplified `call_tool()` dispatch from dictionary pattern to direct if/elif structure
381 | - Updated all handler function signatures to use modern Python type hints
382 | - Maintained backward compatibility while fixing core functionality
383 | 
384 | ---
385 | 
386 | ## [1.0.4] - 2025-07-25
387 | 
388 | ### 🔧 Stability Release: MCP Server Integration Fixed
389 | 
390 | This critical release resolves MCP server integration issues and ensures reliable functionality.
391 | 
392 | #### 🐛 Fixed
393 | - **Import Issues** - Resolved relative import problems that prevented MCP server from loading in Claude Code
394 | - **Server Startup** - Fixed package execution environment compatibility issues
395 | - **MCP Integration** - Proper server initialization and protocol communication
396 | - **Development Installation** - Added editable package installation for proper module resolution
397 | 
398 | #### ✅ Verified
399 | - **Server Functionality** - Comprehensive test suite confirms all 8 MCP tools working correctly
400 | - **Command Execution** - Server starts properly with `code-graph-mcp --project-root .`
401 | - **Protocol Initialization** - MCP server initializes correctly with debug logging
402 | - **Package Installation** - Development mode installation resolves all import dependencies
403 | 
404 | #### 🧪 Testing
405 | - **Comprehensive Test Suite** - Added `test_mcp_server.py` for full MCP functionality validation
406 | - **Basic Functionality Test** - Added `simple_test.py` for core server verification
407 | - **Integration Validation** - Confirmed server works with proper package installation
408 | 
409 | ---
410 | 
411 | ## [1.0.3] - 2025-07-25
412 | 
413 | ### 📚 Documentation Release: Corrected Installation Commands
414 | 
415 | This patch release fixes critical documentation errors in installation commands.
416 | 
417 | #### 🐛 Fixed
418 | - **Installation Commands** - Removed non-existent `--project-root` flag from all documentation
419 | - **README.md** - Corrected MCP server installation instructions for both PyPI and source installations
420 | - **CHANGELOG.md** - Updated installation examples with accurate commands
421 | - **.mcp.json** - Fixed project configuration to use correct command syntax
422 | 
423 | #### 📖 Improved
424 | - **Accurate Documentation** - All installation commands now work correctly
425 | - **User Experience** - Eliminated confusion from incorrect command-line flags
426 | - **Professional Standards** - Documentation consistency across all files
427 | 
428 | ---
429 | 
430 | ## [1.0.2] - 2025-07-25
431 | 
432 | ### 🛠️ Professional Release: Open Source Ready
433 | 
434 | This maintenance release focuses on code quality, professional documentation, and open source preparation.
435 | 
436 | #### ✨ Added
437 | - **MIT License** - Open source license for commercial and personal use
438 | - **Professional Documentation** - Cleaned up comments and documentation for public release
439 | - **Enhanced Error Handling** - Improved logging and error messages across all components
440 | 
441 | #### 🐛 Fixed
442 | - **All Pylance Type Errors** - Resolved attribute access issues with UniversalNode structure
443 | - **Server.py Compatibility** - Fixed data structure alignment with universal graph components
444 | - **Professional Code Quality** - Removed development comments and improved documentation
445 | 
446 | #### 🚀 Improved
447 | - **Perfect Static Analysis** - Maintained 0 Ruff linting errors across all modules
448 | - **Enhanced Type Safety** - Proper attribute access patterns for UniversalNode
449 | - **Enterprise Standards** - Professional code quality suitable for open source distribution
450 | 
451 | ---
452 | 
453 | ## [1.0.1] - 2025-07-25
454 | 
455 | ### 🎯 Quality & Performance Release
456 | 
457 | Major code quality improvements and performance optimizations while maintaining full functionality.
458 | 
459 | #### 🐛 Fixed
460 | - **190+ Linting Errors** - Comprehensive cleanup across all source files
461 | - **Complex Function Refactoring** - Dictionary dispatch pattern for improved maintainability
462 | - **Import Optimization** - Cleaned up unused imports and improved module organization
463 | - **Type Annotation Issues** - Enhanced type hints for better IDE support
464 | 
465 | #### 🚀 Enhanced
466 | - **Perfect Code Quality** - Achieved 0 Ruff linting errors across entire codebase
467 | - **Enhanced Type Safety** - Proper null guards and exception handling
468 | - **Performance Optimizations** - Maintained 50-90% caching improvements
469 | - **Professional Standards** - Enterprise-grade error handling and defensive programming
470 | 
471 | ---
472 | 
473 | ## [1.0.0] - 2025-01-25
474 | 
475 | ### 🎉 Major Release: Multi-Language Support
476 | 
477 | This release transforms the code-graph-mcp from a Python-only analyzer to a comprehensive **25+ language code analysis platform**.
478 | 
479 | ### ✨ Added
480 | 
481 | #### Multi-Language Architecture
482 | - **Universal Parser** - ast-grep-powered parsing for 25+ programming languages
483 | - **Language-Agnostic Graph Structures** - Universal AST representation that works across all languages
484 | - **Intelligent Language Detection** - Multi-method detection (extension, MIME, shebang, content signatures)
485 | - **Cross-Language Analysis** - Code similarity, complexity, and pattern detection across language boundaries
486 | 
487 | #### Supported Languages (25+)
488 | - **Web & Frontend**: JavaScript, TypeScript, HTML, CSS
489 | - **Backend & Systems**: Python, Java, C#, C++, C, Rust, Go  
490 | - **JVM Languages**: Java, Kotlin, Scala
491 | - **Functional**: Elixir, Elm, Haskell, OCaml, F#
492 | - **Mobile**: Swift, Dart
493 | - **Scripting**: Ruby, PHP, Lua
494 | - **Data & Config**: SQL, YAML, JSON, TOML
495 | - **Markup & Docs**: XML, Markdown
496 | 
497 | #### Advanced Analysis Features
498 | - **Code Smell Detection** - Long functions, complex logic, duplicate patterns across languages
499 | - **Cross-Language Call Graphs** - Function relationships spanning multiple languages
500 | - **Circular Dependency Detection** - Import/dependency cycle analysis
501 | - **Maintainability Indexing** - Project health scoring with language-aware metrics
502 | - **Framework Recognition** - React, Angular, Vue, Django, Flask, Spring, and 15+ more
503 | 
504 | #### Project Intelligence
505 | - **Project Profiling** - Automatic detection of project type, build systems, CI configuration
506 | - **Multi-Language Statistics** - Comprehensive metrics across entire polyglot codebases
507 | - **Smart File Discovery** - Language-aware filtering with framework detection
508 | - **Parallel Processing** - Concurrent analysis of multi-language projects
509 | 
510 | ### 🚀 Enhanced
511 | 
512 | #### Performance Improvements
513 | - **Multi-Language AST Caching** - LRU caching with mtime invalidation across all languages
514 | - **Intelligent Routing** - Priority-based analysis with language-specific optimizations
515 | - **Memory Efficiency** - Universal graph structures with optimized storage
516 | 
517 | #### Enterprise Features
518 | - **Production Stability** - Comprehensive error handling across all language parsers
519 | - **Defensive Security** - Secure analysis without code execution
520 | - **Comprehensive Testing** - 14 test suites covering all major features
521 | - **10.00/10 Pylint Score** - Maintained code quality standards
522 | 
523 | ### 🔄 Changed
524 | 
525 | #### Breaking Changes
526 | - Minimum Python version remains 3.12+
527 | - New dependency: `ast-grep-py>=0.39.0` for multi-language parsing
528 | - Enhanced MCP tools now return language-aware results
529 | 
530 | #### API Evolution
531 | - All existing MCP tools (`analyze_codebase`, `find_definition`, etc.) now work across all 25+ languages
532 | - Universal node types replace Python-specific AST structures
533 | - Language detection integrated into all analysis workflows
534 | 
535 | ### 📦 Dependencies
536 | 
537 | #### New Requirements
538 | - `ast-grep-py>=0.39.0` - Multi-language parsing backend
539 | - Enhanced MCP protocol support for cross-language analysis
540 | 
541 | #### Development Dependencies
542 | - `pytest>=7.0.0` with multi-language test fixtures
543 | - `black>=23.0.0` and `ruff>=0.1.0` for code quality
544 | 
545 | ### 🧪 Testing
546 | 
547 | - **Comprehensive Test Suite** - 14 tests covering all major features
548 | - **Multi-Language Integration Tests** - End-to-end validation of parsing pipeline
549 | - **Language Registry Tests** - Verification of all 25+ language configurations
550 | - **Performance Benchmarks** - Cross-language analysis performance validation
551 | 
552 | ### 📚 Documentation
553 | 
554 | - **Updated README** - Complete multi-language feature documentation
555 | - **Enhanced Installation Guide** - PyPI and source installation with ast-grep-py
556 | - **Usage Examples** - Real-world multi-language project analysis scenarios
557 | - **Language Support Matrix** - Detailed breakdown of all supported languages
558 | 
559 | ### 🎯 Migration Guide
560 | 
561 | #### For Existing Users
562 | The v1.0.0 release is backward compatible - all existing functionality continues to work exactly as before, but now with enhanced multi-language capabilities.
563 | 
564 | #### New Installation
565 | ```bash
566 | pip install code-graph-mcp  # Now automatically includes ast-grep-py
567 | claude mcp add --scope project code-graph-mcp "uv run code-graph-mcp --verbose"
568 | ```
569 | 
570 | #### Enhanced Features
571 | - Same MCP tools, now work with JavaScript, TypeScript, Java, Rust, Go, and 20+ more languages
572 | - Automatic language detection - no configuration needed
573 | - Cross-language analysis - find relationships between Python APIs and React components
574 | 
575 | ---
576 | 
577 | ## [0.1.0] - 2025-01-20
578 | 
579 | ### Initial Release
580 | - Python-only code analysis
581 | - 8 MCP analysis tools
582 | - AST parsing with caching
583 | - Basic complexity analysis
584 | - MCP protocol integration


--------------------------------------------------------------------------------