├── .codestateignore ├── .python-version ├── tests ├── .gitignore ├── __init__.py ├── test_diagnostics │ ├── __init__.py │ ├── test_ast.py │ ├── test_language_registry.py │ └── test_language_pack.py ├── conftest.py ├── test_di.py ├── test_persistent_server.py ├── test_basic.py ├── test_language_listing.py ├── test_ast_cursor.py ├── test_cli_arguments.py ├── test_makefile_targets.py ├── test_env_config.py ├── test_config_manager.py ├── test_logging_early_init.py ├── test_logging_bootstrap.py ├── test_project_persistence.py └── test_logging_config.py ├── src └── mcp_server_tree_sitter │ ├── tools │ ├── __init__.py │ ├── debug.py │ ├── project.py │ ├── ast_operations.py │ └── query_builder.py │ ├── models │ ├── __init__.py │ └── ast_cursor.py │ ├── prompts │ └── __init__.py │ ├── cache │ └── __init__.py │ ├── utils │ ├── __init__.py │ ├── context │ │ ├── __init__.py │ │ └── mcp_context.py │ ├── security.py │ ├── path.py │ ├── file_io.py │ └── tree_sitter_types.py │ ├── language │ ├── __init__.py │ ├── templates │ │ ├── __init__.py │ │ ├── apl.py │ │ ├── c.py │ │ ├── javascript.py │ │ ├── python.py │ │ ├── rust.py │ │ ├── go.py │ │ ├── cpp.py │ │ ├── java.py │ │ ├── julia.py │ │ ├── swift.py │ │ ├── typescript.py │ │ └── kotlin.py │ └── query_templates.py │ ├── capabilities │ ├── __init__.py │ └── server_capabilities.py │ ├── testing │ └── __init__.py │ ├── bootstrap │ ├── __init__.py │ └── logging_bootstrap.py │ ├── __init__.py │ ├── logging_config.py │ ├── exceptions.py │ ├── di.py │ ├── __main__.py │ ├── api.py │ ├── context.py │ └── server.py ├── LICENSE ├── scripts └── implementation-search.sh ├── NOTICE ├── .github └── workflows │ ├── release.yml │ └── ci.yml ├── pyproject.toml ├── docs ├── cli.md ├── tree-sitter-type-safety.md └── architecture.md ├── CONTRIBUTING.md ├── .gitignore ├── TODO.md └── ROADMAP.md /.codestateignore: -------------------------------------------------------------------------------- 1 | uv.lock 2 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /tests/.gitignore: -------------------------------------------------------------------------------- 1 | # Reports 2 | *.json 3 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test package for mcp-server-tree-sitter.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/__init__.py: -------------------------------------------------------------------------------- 1 | """MCP tool components.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Data models for MCP server.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/prompts/__init__.py: -------------------------------------------------------------------------------- 1 | """MCP prompt components.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/cache/__init__.py: -------------------------------------------------------------------------------- 1 | """Cache components for MCP server.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions for MCP server.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/__init__.py: -------------------------------------------------------------------------------- 1 | """Language handling components for MCP server.""" 2 | -------------------------------------------------------------------------------- /tests/test_diagnostics/__init__.py: -------------------------------------------------------------------------------- 1 | """Pytest-based diagnostic tests for mcp-server-tree-sitter.""" 2 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/capabilities/__init__.py: -------------------------------------------------------------------------------- 1 | """MCP capability declarations.""" 2 | 3 | from .server_capabilities import register_capabilities 4 | 5 | __all__ = ["register_capabilities"] 6 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/testing/__init__.py: -------------------------------------------------------------------------------- 1 | """Testing utilities for mcp-server-tree-sitter.""" 2 | 3 | from .pytest_diagnostic import DiagnosticData, diagnostic 4 | 5 | __all__ = ["DiagnosticData", "diagnostic"] 6 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/context/__init__.py: -------------------------------------------------------------------------------- 1 | """Context handling utilities for MCP operations.""" 2 | 3 | from .mcp_context import MCPContext, ProgressScope 4 | 5 | __all__ = ["MCPContext", "ProgressScope"] 6 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/bootstrap/__init__.py: -------------------------------------------------------------------------------- 1 | """Bootstrap package for early initialization dependencies. 2 | 3 | This package contains modules that should be imported and initialized before 4 | any other modules in the project to ensure proper setup of core services. 5 | """ 6 | 7 | # Import logging bootstrap module to ensure it's available 8 | from . import logging_bootstrap 9 | 10 | # Export key functions for convenience 11 | from .logging_bootstrap import get_log_level_from_env, get_logger, update_log_levels 12 | 13 | __all__ = ["get_logger", "update_log_levels", "get_log_level_from_env", "logging_bootstrap"] 14 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/__init__.py: -------------------------------------------------------------------------------- 1 | """MCP Server for Tree-sitter - Code analysis capabilities using tree-sitter. 2 | 3 | This module provides a Model Context Protocol server that gives LLMs like Claude 4 | intelligent access to codebases with appropriate context management. 5 | """ 6 | 7 | # Import bootstrap package first to ensure core services are set up 8 | # before any other modules are imported 9 | from . import bootstrap as bootstrap # noqa: F401 - Import needed for initialization 10 | 11 | # Logging is now configured via the bootstrap.logging_bootstrap module 12 | # The bootstrap module automatically calls configure_root_logger() when imported 13 | 14 | __version__ = "0.1.0" 15 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/__init__.py: -------------------------------------------------------------------------------- 1 | """Language-specific query templates collection.""" 2 | 3 | from typing import Dict 4 | 5 | from . import ( 6 | apl, 7 | c, 8 | cpp, 9 | go, 10 | java, 11 | javascript, 12 | julia, 13 | kotlin, 14 | python, 15 | rust, 16 | swift, 17 | typescript, 18 | ) 19 | 20 | # Combine all language templates 21 | QUERY_TEMPLATES: Dict[str, Dict[str, str]] = { 22 | "python": python.TEMPLATES, 23 | "javascript": javascript.TEMPLATES, 24 | "typescript": typescript.TEMPLATES, 25 | "go": go.TEMPLATES, 26 | "rust": rust.TEMPLATES, 27 | "c": c.TEMPLATES, 28 | "cpp": cpp.TEMPLATES, 29 | "swift": swift.TEMPLATES, 30 | "java": java.TEMPLATES, 31 | "kotlin": kotlin.TEMPLATES, 32 | "julia": julia.TEMPLATES, 33 | "apl": apl.TEMPLATES, 34 | } 35 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/apl.py: -------------------------------------------------------------------------------- 1 | """Query templates for APL language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_definition 6 | name: (identifier) @function.name 7 | body: (block) @function.body) @function.def 8 | """, 9 | "namespaces": """ 10 | (namespace_declaration 11 | name: (identifier) @namespace.name) @namespace.def 12 | """, 13 | "variables": """ 14 | (assignment 15 | left: (identifier) @variable.name) @variable.def 16 | """, 17 | "imports": """ 18 | (import_statement 19 | module: (identifier) @import.module) @import 20 | """, 21 | "operators": """ 22 | (operator_definition 23 | operator: (_) @operator.sym 24 | body: (block) @operator.body) @operator.def 25 | """, 26 | "classes": """ 27 | (class_definition 28 | name: (identifier) @class.name 29 | body: (block) @class.body) @class.def 30 | """, 31 | } 32 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/logging_config.py: -------------------------------------------------------------------------------- 1 | """Logging configuration for MCP Tree-sitter Server. 2 | 3 | This module is maintained for backwards compatibility. 4 | All functionality has been moved to the bootstrap.logging_bootstrap module, 5 | which is the canonical source for logging configuration. 6 | 7 | All imports from this module should be updated to use: 8 | from mcp_server_tree_sitter.bootstrap import get_logger, update_log_levels 9 | """ 10 | 11 | # Import the bootstrap module's logging components to maintain backwards compatibility 12 | from .bootstrap.logging_bootstrap import ( 13 | LOG_LEVEL_MAP, 14 | configure_root_logger, 15 | get_log_level_from_env, 16 | get_logger, 17 | update_log_levels, 18 | ) 19 | 20 | # Re-export all the functions and constants for backwards compatibility 21 | __all__ = ["LOG_LEVEL_MAP", "configure_root_logger", "get_log_level_from_env", "get_logger", "update_log_levels"] 22 | 23 | # The bootstrap module already calls configure_root_logger() when imported, 24 | # so we don't need to call it again here. 25 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Pytest configuration for mcp-server-tree-sitter tests.""" 2 | 3 | import pytest 4 | 5 | # Import and register the diagnostic plugin 6 | pytest_plugins = ["mcp_server_tree_sitter.testing.pytest_diagnostic"] 7 | 8 | 9 | @pytest.fixture(autouse=True, scope="function") 10 | def reset_project_registry(): 11 | """Reset the project registry between tests. 12 | 13 | This prevents tests from interfering with each other when using the 14 | project registry, which is a singleton that persists across tests. 15 | """ 16 | # Import here to avoid circular imports 17 | from mcp_server_tree_sitter.di import get_container 18 | 19 | # Get registry through DI container 20 | container = get_container() 21 | registry = container.project_registry 22 | 23 | # Store original projects to restore after test 24 | original_projects = dict(registry._projects) 25 | 26 | # Clear for this test 27 | registry._projects.clear() 28 | 29 | yield 30 | 31 | # Restore original projects 32 | registry._projects.clear() 33 | registry._projects.update(original_projects) 34 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/query_templates.py: -------------------------------------------------------------------------------- 1 | """Query templates for common code patterns by language.""" 2 | 3 | from typing import Any, Dict, Optional 4 | 5 | from .templates import QUERY_TEMPLATES 6 | 7 | 8 | def get_query_template(language: str, template_name: str) -> Optional[str]: 9 | """ 10 | Get a query template for a language. 11 | 12 | Args: 13 | language: Language identifier 14 | template_name: Template name 15 | 16 | Returns: 17 | Query string or None if not found 18 | """ 19 | language_templates = QUERY_TEMPLATES.get(language) 20 | if language_templates: 21 | return language_templates.get(template_name) 22 | return None 23 | 24 | 25 | def list_query_templates(language: Optional[str] = None) -> Dict[str, Any]: 26 | """ 27 | List available query templates. 28 | 29 | Args: 30 | language: Optional language to filter by 31 | 32 | Returns: 33 | Dictionary of templates by language 34 | """ 35 | if language: 36 | return {language: QUERY_TEMPLATES.get(language, {})} 37 | return QUERY_TEMPLATES 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Wrale 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/implementation-search.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # implementation-search.sh - Script to spot check implementation patterns 3 | 4 | # Enable strict mode 5 | set -euo pipefail 6 | 7 | # Check if search term is provided 8 | if [ $# -eq 0 ]; then 9 | echo "Usage: $0 " 10 | exit 1 11 | fi 12 | 13 | # Directories to exclude 14 | EXCLUDE_DIRS=( 15 | ".venv" 16 | ".git" 17 | "./diagnostic_results" 18 | "./.pytest_cache" 19 | "./.ruff_cache" 20 | "./.mypy_cache" 21 | "./tests/__pycache__" 22 | "./__pycache__" 23 | "./src/mcp_server_tree_sitter/__pycache__" 24 | "./src/*/bootstrap/__pycache__" 25 | "./src/*/__pycache__" 26 | ) 27 | 28 | # Files to exclude 29 | EXCLUDE_FILES=( 30 | "./.gitignore" 31 | "./TODO.md" 32 | "./FEATURES.md" 33 | ) 34 | 35 | # Build exclude arguments for grep 36 | EXCLUDE_ARGS="" 37 | for dir in "${EXCLUDE_DIRS[@]}"; do 38 | EXCLUDE_ARGS+="--exclude-dir=${dir} " 39 | done 40 | 41 | for file in "${EXCLUDE_FILES[@]}"; do 42 | EXCLUDE_ARGS+="--exclude=${file} " 43 | done 44 | 45 | # Run grep with all exclusions 46 | grep -r "${1}" . ${EXCLUDE_ARGS} --binary-files=without-match 47 | -------------------------------------------------------------------------------- /tests/test_di.py: -------------------------------------------------------------------------------- 1 | """Tests for the dependency injection container.""" 2 | 3 | from mcp_server_tree_sitter.di import get_container 4 | 5 | 6 | def test_container_singleton(): 7 | """Test that get_container returns the same instance each time.""" 8 | container1 = get_container() 9 | container2 = get_container() 10 | assert container1 is container2 11 | 12 | 13 | def test_register_custom_dependency(): 14 | """Test registering and retrieving a custom dependency.""" 15 | container = get_container() 16 | 17 | # Register a custom dependency 18 | test_value = {"test": "value"} 19 | container.register_dependency("test_dependency", test_value) 20 | 21 | # Retrieve it 22 | retrieved = container.get_dependency("test_dependency") 23 | assert retrieved is test_value 24 | 25 | 26 | def test_core_dependencies_initialized(): 27 | """Test that core dependencies are automatically initialized.""" 28 | container = get_container() 29 | 30 | assert container.config_manager is not None 31 | assert container.project_registry is not None 32 | assert container.language_registry is not None 33 | assert container.tree_cache is not None 34 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/c.py: -------------------------------------------------------------------------------- 1 | """Query templates for C language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_definition 6 | declarator: (function_declarator 7 | declarator: (identifier) @function.name)) @function.def 8 | 9 | (declaration 10 | declarator: (function_declarator 11 | declarator: (identifier) @function.name)) @function.decl 12 | """, 13 | "structs": """ 14 | (struct_specifier 15 | name: (type_identifier) @struct.name) @struct.def 16 | 17 | (union_specifier 18 | name: (type_identifier) @union.name) @union.def 19 | 20 | (enum_specifier 21 | name: (type_identifier) @enum.name) @enum.def 22 | """, 23 | "imports": """ 24 | (preproc_include) @import 25 | 26 | (preproc_include 27 | path: (string_literal) @import.system) @import.system 28 | 29 | (preproc_include 30 | path: (system_lib_string) @import.system) @import.system 31 | """, 32 | "macros": """ 33 | (preproc_function_def 34 | name: (identifier) @macro.name) @macro.def 35 | """, 36 | } 37 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/javascript.py: -------------------------------------------------------------------------------- 1 | """Query templates for JavaScript.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_declaration 6 | name: (identifier) @function.name 7 | parameters: (formal_parameters) @function.params 8 | body: (statement_block) @function.body) @function.def 9 | 10 | (arrow_function 11 | parameters: (formal_parameters) @function.params 12 | body: (_) @function.body) @function.def 13 | """, 14 | "classes": """ 15 | (class_declaration 16 | name: (identifier) @class.name 17 | body: (class_body) @class.body) @class.def 18 | """, 19 | "imports": """ 20 | (import_statement) @import 21 | 22 | (import_statement 23 | source: (string) @import.source 24 | specifier: (_) @import.specifier) @import.full 25 | """, 26 | "function_calls": """ 27 | (call_expression 28 | function: (identifier) @call.function 29 | arguments: (arguments) @call.args) @call 30 | """, 31 | "assignments": """ 32 | (variable_declarator 33 | name: (_) @assign.target 34 | value: (_) @assign.value) @assign 35 | """, 36 | } 37 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/exceptions.py: -------------------------------------------------------------------------------- 1 | """Exception classes for mcp-server-tree-sitter.""" 2 | 3 | 4 | class MCPTreeSitterError(Exception): 5 | """Base exception for mcp-server-tree-sitter.""" 6 | 7 | pass 8 | 9 | 10 | class LanguageError(MCPTreeSitterError): 11 | """Errors related to tree-sitter languages.""" 12 | 13 | pass 14 | 15 | 16 | class LanguageNotFoundError(LanguageError): 17 | """Raised when a language parser is not available.""" 18 | 19 | pass 20 | 21 | 22 | class LanguageInstallError(LanguageError): 23 | """Raised when language installation fails.""" 24 | 25 | pass 26 | 27 | 28 | class ParsingError(MCPTreeSitterError): 29 | """Errors during parsing.""" 30 | 31 | pass 32 | 33 | 34 | class ProjectError(MCPTreeSitterError): 35 | """Errors related to project management.""" 36 | 37 | pass 38 | 39 | 40 | class FileAccessError(MCPTreeSitterError): 41 | """Errors accessing project files.""" 42 | 43 | pass 44 | 45 | 46 | class QueryError(MCPTreeSitterError): 47 | """Errors related to tree-sitter queries.""" 48 | 49 | pass 50 | 51 | 52 | class SecurityError(MCPTreeSitterError): 53 | """Security-related errors.""" 54 | 55 | pass 56 | 57 | 58 | class CacheError(MCPTreeSitterError): 59 | """Errors related to caching.""" 60 | 61 | pass 62 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/python.py: -------------------------------------------------------------------------------- 1 | """Query templates for Python.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_definition 6 | name: (identifier) @function.name 7 | parameters: (parameters) @function.params 8 | body: (block) @function.body) @function.def 9 | """, 10 | "classes": """ 11 | (class_definition 12 | name: (identifier) @class.name 13 | body: (block) @class.body) @class.def 14 | """, 15 | "imports": """ 16 | (import_statement 17 | name: (dotted_name) @import.module) @import 18 | 19 | (import_from_statement 20 | module_name: (dotted_name) @import.from 21 | name: (dotted_name) @import.item) @import 22 | 23 | ;; Handle aliased imports with 'as' keyword 24 | (import_from_statement 25 | module_name: (dotted_name) @import.from 26 | name: (aliased_import 27 | name: (dotted_name) @import.item 28 | alias: (identifier) @import.alias)) @import 29 | """, 30 | "function_calls": """ 31 | (call 32 | function: (identifier) @call.function 33 | arguments: (argument_list) @call.args) @call 34 | """, 35 | "assignments": """ 36 | (assignment 37 | left: (_) @assign.target 38 | right: (_) @assign.value) @assign 39 | """, 40 | } 41 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/rust.py: -------------------------------------------------------------------------------- 1 | """Query templates for Rust.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_item 6 | name: (identifier) @function.name 7 | parameters: (parameters) @function.params 8 | body: (block) @function.body) @function.def 9 | """, 10 | "structs": """ 11 | (struct_item 12 | name: (type_identifier) @struct.name 13 | body: (field_declaration_list) @struct.body) @struct.def 14 | """, 15 | "enums": """ 16 | (enum_item 17 | name: (type_identifier) @enum.name 18 | body: (enum_variant_list) @enum.body) @enum.def 19 | """, 20 | "imports": """ 21 | (use_declaration) @import 22 | 23 | (use_declaration 24 | (identifier) @import.name) @import.direct 25 | 26 | (use_declaration 27 | (scoped_identifier 28 | path: (_) @import.path 29 | name: (identifier) @import.name)) @import.scoped 30 | 31 | (use_declaration 32 | (scoped_use_list 33 | path: (_) @import.path)) @import.list 34 | """, 35 | "traits": """ 36 | (trait_item 37 | name: (type_identifier) @trait.name) @trait.def 38 | """, 39 | "impls": """ 40 | (impl_item 41 | trait: (_)? @impl.trait 42 | type: (_) @impl.type) @impl.def 43 | """, 44 | } 45 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/go.py: -------------------------------------------------------------------------------- 1 | """Query templates for Go.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_declaration 6 | name: (identifier) @function.name 7 | parameters: (parameter_list) @function.params 8 | body: (block) @function.body) @function.def 9 | 10 | (method_declaration 11 | name: (field_identifier) @method.name 12 | parameters: (parameter_list) @method.params 13 | body: (block) @method.body) @method.def 14 | """, 15 | "structs": """ 16 | (type_declaration 17 | (type_spec 18 | name: (type_identifier) @struct.name 19 | type: (struct_type) @struct.body)) @struct.def 20 | 21 | (type_declaration 22 | (type_spec 23 | name: (type_identifier) @type.name 24 | type: (_) @type.body)) @type.def 25 | """, 26 | "imports": """ 27 | (import_declaration) @import 28 | 29 | (import_declaration 30 | (import_spec_list 31 | (import_spec) @import.spec)) @import.list 32 | 33 | (import_declaration 34 | (import_spec_list 35 | (import_spec 36 | path: (_) @import.path))) @import.path_list 37 | 38 | (import_declaration 39 | (import_spec 40 | path: (_) @import.path)) @import.single 41 | """, 42 | "interfaces": """ 43 | (type_declaration 44 | (type_spec 45 | name: (type_identifier) @interface.name 46 | type: (interface_type) @interface.body)) @interface.def 47 | """, 48 | } 49 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/cpp.py: -------------------------------------------------------------------------------- 1 | """Query templates for C++ language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_definition 6 | declarator: (function_declarator 7 | declarator: (identifier) @function.name)) @function.def 8 | 9 | (declaration 10 | declarator: (function_declarator 11 | declarator: (identifier) @function.name)) @function.decl 12 | 13 | (method_definition 14 | declarator: (function_declarator 15 | declarator: (field_identifier) @method.name)) @method.def 16 | """, 17 | "classes": """ 18 | (class_specifier 19 | name: (type_identifier) @class.name) @class.def 20 | """, 21 | "structs": """ 22 | (struct_specifier 23 | name: (type_identifier) @struct.name) @struct.def 24 | 25 | (union_specifier 26 | name: (type_identifier) @union.name) @union.def 27 | 28 | (enum_specifier 29 | name: (type_identifier) @enum.name) @enum.def 30 | """, 31 | "imports": """ 32 | (preproc_include) @import 33 | 34 | (preproc_include 35 | path: (string_literal) @import.path) @import.user 36 | 37 | (preproc_include 38 | path: (system_lib_string) @import.path) @import.system 39 | 40 | (namespace_definition 41 | name: (namespace_identifier) @import.namespace) @import.namespace_def 42 | """, 43 | "templates": """ 44 | (template_declaration) @template.def 45 | 46 | (template_declaration 47 | declaration: (class_specifier 48 | name: (type_identifier) @template.class)) @template.class_def 49 | """, 50 | } 51 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/java.py: -------------------------------------------------------------------------------- 1 | """Query templates for Java language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (method_declaration 6 | name: (identifier) @function.name 7 | parameters: (formal_parameters) @function.params 8 | body: (block) @function.body) @function.def 9 | 10 | (constructor_declaration 11 | name: (identifier) @constructor.name 12 | parameters: (formal_parameters) @constructor.params 13 | body: (block) @constructor.body) @constructor.def 14 | """, 15 | "classes": """ 16 | (class_declaration 17 | name: (identifier) @class.name 18 | body: (class_body) @class.body) @class.def 19 | """, 20 | "interfaces": """ 21 | (interface_declaration 22 | name: (identifier) @interface.name 23 | body: (class_body) @interface.body) @interface.def 24 | """, 25 | "imports": """ 26 | (import_declaration) @import 27 | 28 | (import_declaration 29 | name: (qualified_name) @import.name) @import.qualified 30 | 31 | (import_declaration 32 | name: (qualified_name 33 | name: (identifier) @import.class)) @import.class 34 | 35 | (import_declaration 36 | asterisk: "*") @import.wildcard 37 | """, 38 | "annotations": """ 39 | (annotation 40 | name: (identifier) @annotation.name) @annotation 41 | 42 | (annotation_type_declaration 43 | name: (identifier) @annotation.type_name) @annotation.type 44 | """, 45 | "enums": """ 46 | (enum_declaration 47 | name: (identifier) @enum.name 48 | body: (enum_body) @enum.body) @enum.def 49 | """, 50 | } 51 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/julia.py: -------------------------------------------------------------------------------- 1 | """Query templates for Julia language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_definition 6 | name: (identifier) @function.name) @function.def 7 | 8 | (function_definition 9 | name: (identifier) @function.name 10 | parameters: (parameter_list) @function.params 11 | body: (block) @function.body) @function.def 12 | 13 | (short_function_definition 14 | name: (identifier) @function.name) @function.short_def 15 | """, 16 | "modules": """ 17 | (module_definition 18 | name: (identifier) @module.name 19 | body: (block) @module.body) @module.def 20 | """, 21 | "structs": """ 22 | (struct_definition 23 | name: (identifier) @struct.name 24 | body: (block) @struct.body) @struct.def 25 | 26 | (mutable_struct_definition 27 | name: (identifier) @struct.name 28 | body: (block) @struct.body) @struct.mutable_def 29 | """, 30 | "imports": """ 31 | (import_statement) @import 32 | 33 | (import_statement 34 | name: (identifier) @import.name) @import.simple 35 | 36 | (using_statement) @using 37 | 38 | (using_statement 39 | name: (identifier) @using.name) @using.simple 40 | 41 | (import_statement 42 | name: (dot_expression) @import.qualified) @import.qualified 43 | """, 44 | "macros": """ 45 | (macro_definition 46 | name: (identifier) @macro.name 47 | body: (block) @macro.body) @macro.def 48 | """, 49 | "abstractTypes": """ 50 | (abstract_definition 51 | name: (identifier) @abstract.name) @abstract.def 52 | """, 53 | } 54 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/swift.py: -------------------------------------------------------------------------------- 1 | """Query templates for Swift language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_declaration 6 | name: (identifier) @function.name) @function.def 7 | 8 | (function_declaration 9 | name: (identifier) @function.name 10 | body: (code_block) @function.body) @function.def 11 | """, 12 | "classes": """ 13 | (class_declaration 14 | name: (type_identifier) @class.name) @class.def 15 | 16 | (class_declaration 17 | name: (type_identifier) @class.name 18 | body: (class_body) @class.body) @class.def 19 | """, 20 | "structs": """ 21 | (struct_declaration 22 | name: (type_identifier) @struct.name) @struct.def 23 | 24 | (struct_declaration 25 | name: (type_identifier) @struct.name 26 | body: (struct_body) @struct.body) @struct.def 27 | """, 28 | "imports": """ 29 | (import_declaration) @import 30 | 31 | (import_declaration 32 | path: (identifier) @import.path) @import.simple 33 | 34 | (import_declaration 35 | path: (_) @import.path) @import.complex 36 | """, 37 | "protocols": """ 38 | (protocol_declaration 39 | name: (type_identifier) @protocol.name) @protocol.def 40 | 41 | (protocol_declaration 42 | name: (type_identifier) @protocol.name 43 | body: (protocol_body) @protocol.body) @protocol.def 44 | """, 45 | "extensions": """ 46 | (extension_declaration 47 | name: (type_identifier) @extension.name) @extension.def 48 | 49 | (extension_declaration 50 | name: (type_identifier) @extension.name 51 | body: (extension_body) @extension.body) @extension.def 52 | """, 53 | } 54 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/typescript.py: -------------------------------------------------------------------------------- 1 | """Query templates for TypeScript.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_declaration 6 | name: (identifier) @function.name 7 | parameters: (formal_parameters) @function.params 8 | body: (statement_block) @function.body) @function.def 9 | 10 | (arrow_function 11 | parameters: (formal_parameters) @function.params 12 | body: (_) @function.body) @function.def 13 | 14 | (method_definition 15 | name: (property_identifier) @method.name 16 | parameters: (formal_parameters) @method.params 17 | body: (statement_block) @method.body) @method.def 18 | """, 19 | "classes": """ 20 | (class_declaration 21 | name: (type_identifier) @class.name 22 | body: (class_body) @class.body) @class.def 23 | """, 24 | "interfaces": """ 25 | (interface_declaration 26 | name: (type_identifier) @interface.name 27 | body: (object_type) @interface.body) @interface.def 28 | 29 | (type_alias_declaration 30 | name: (type_identifier) @alias.name 31 | value: (_) @alias.value) @alias.def 32 | """, 33 | "imports": """ 34 | (import_statement) @import 35 | 36 | (import_statement 37 | source: (string) @import.source) @import.source_only 38 | 39 | (import_statement 40 | source: (string) @import.source 41 | specifier: (named_imports 42 | (import_specifier 43 | name: (identifier) @import.name))) @import.named 44 | 45 | (import_statement 46 | source: (string) @import.source 47 | specifier: (namespace_import 48 | name: (identifier) @import.namespace)) @import.namespace 49 | """, 50 | } 51 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/language/templates/kotlin.py: -------------------------------------------------------------------------------- 1 | """Query templates for Kotlin language.""" 2 | 3 | TEMPLATES = { 4 | "functions": """ 5 | (function_declaration 6 | name: (simple_identifier) @function.name) @function.def 7 | 8 | (function_declaration 9 | name: (simple_identifier) @function.name 10 | function_body: (function_body) @function.body) @function.def 11 | """, 12 | "classes": """ 13 | (class_declaration 14 | name: (simple_identifier) @class.name) @class.def 15 | 16 | (class_declaration 17 | name: (simple_identifier) @class.name 18 | class_body: (class_body) @class.body) @class.def 19 | """, 20 | "interfaces": """ 21 | (interface_declaration 22 | name: (simple_identifier) @interface.name) @interface.def 23 | 24 | (interface_declaration 25 | name: (simple_identifier) @interface.name 26 | class_body: (class_body) @interface.body) @interface.def 27 | """, 28 | "imports": """ 29 | (import_header) @import 30 | 31 | (import_header 32 | identifier: (identifier) @import.id) @import.simple 33 | 34 | (import_header 35 | identifier: (dot_qualified_expression) @import.qualified) @import.qualified 36 | 37 | (import_header 38 | import_alias: (import_alias 39 | name: (simple_identifier) @import.alias)) @import.aliased 40 | """, 41 | "properties": """ 42 | (property_declaration 43 | variable_declaration: (variable_declaration 44 | simple_identifier: (simple_identifier) @property.name)) @property.def 45 | """, 46 | "dataClasses": """ 47 | (class_declaration 48 | type: (type_modifiers 49 | (type_modifier 50 | "data" @data_class.modifier)) 51 | name: (simple_identifier) @data_class.name) @data_class.def 52 | """, 53 | } 54 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | MCP Tree-sitter Server 2 | Copyright (c) 2025 Wrale 3 | Licensed under the MIT License (see LICENSE file) 4 | 5 | This software includes or depends upon the following third-party components: 6 | 7 | -------------------------------------------------- 8 | tree-sitter 9 | -------------------------------------------------- 10 | https://github.com/tree-sitter/tree-sitter 11 | Copyright (c) 2018-2024 Max Brunsfeld 12 | Licensed under the MIT License 13 | 14 | -------------------------------------------------- 15 | tree-sitter-language-pack 16 | -------------------------------------------------- 17 | https://github.com/Goldziher/tree-sitter-language-pack 18 | 19 | Dual licensed: 20 | 1. MIT License 21 | Copyright (c) 2024-2025 Na'aman Hirschfeld 22 | 23 | 2. Apache License 2.0 24 | Copyright (c) 2022 Grant Jenks 25 | As a fork of tree-sitter-languages 26 | 27 | tree-sitter-language-pack bundles numerous tree-sitter language parsers, 28 | each with their own licenses (all permissive: MIT, Apache 2.0, etc.). 29 | See the tree-sitter-language-pack repository for details on individual language parsers. 30 | 31 | -------------------------------------------------- 32 | Python Dependencies 33 | -------------------------------------------------- 34 | - mcp: Model Context Protocol implementation 35 | - pydantic: Data validation library 36 | - pyyaml: YAML parsing library 37 | 38 | All Python dependencies are used in accordance with their respective licenses. 39 | 40 | -------------------------------------------------- 41 | Note on Language Grammars 42 | -------------------------------------------------- 43 | When using tree-sitter-language-pack, this project indirectly incorporates 44 | numerous tree-sitter language grammars. As noted in tree-sitter-language-pack's 45 | documentation, all bundled grammars are under permissive open-source licenses 46 | (MIT, Apache 2.0, etc.) and no GPL-licensed grammars are included. 47 | 48 | For a complete list of included grammars and their specific licenses, please refer to: 49 | https://github.com/Goldziher/tree-sitter-language-pack#available-languages 50 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/di.py: -------------------------------------------------------------------------------- 1 | """Dependency injection container for MCP Tree-sitter Server. 2 | 3 | This module provides a central container for managing all application dependencies, 4 | replacing the global variables and singletons previously used throughout the codebase. 5 | """ 6 | 7 | from typing import Any, Dict 8 | 9 | # Import logging from bootstrap package 10 | from .bootstrap import get_logger 11 | from .cache.parser_cache import TreeCache 12 | from .config import ConfigurationManager, ServerConfig 13 | from .language.registry import LanguageRegistry 14 | from .models.project import ProjectRegistry 15 | 16 | logger = get_logger(__name__) 17 | 18 | 19 | class DependencyContainer: 20 | """Container for all application dependencies.""" 21 | 22 | def __init__(self) -> None: 23 | """Initialize container with all core dependencies.""" 24 | logger.debug("Initializing dependency container") 25 | 26 | # Create core dependencies 27 | self.config_manager = ConfigurationManager() 28 | self._config = self.config_manager.get_config() 29 | self.project_registry = ProjectRegistry() 30 | self.language_registry = LanguageRegistry() 31 | self.tree_cache = TreeCache( 32 | max_size_mb=self._config.cache.max_size_mb, ttl_seconds=self._config.cache.ttl_seconds 33 | ) 34 | 35 | # Storage for any additional dependencies 36 | self._additional: Dict[str, Any] = {} 37 | 38 | def get_config(self) -> ServerConfig: 39 | """Get the current configuration.""" 40 | # Always get the latest from the config manager 41 | config = self.config_manager.get_config() 42 | return config 43 | 44 | def register_dependency(self, name: str, instance: Any) -> None: 45 | """Register an additional dependency.""" 46 | self._additional[name] = instance 47 | 48 | def get_dependency(self, name: str) -> Any: 49 | """Get a registered dependency.""" 50 | return self._additional.get(name) 51 | 52 | 53 | # Create the single container instance - this will be the ONLY global 54 | container = DependencyContainer() 55 | 56 | 57 | def get_container() -> DependencyContainer: 58 | """Get the dependency container.""" 59 | return container 60 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/security.py: -------------------------------------------------------------------------------- 1 | """Security utilities for mcp-server-tree-sitter.""" 2 | 3 | import logging 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | from ..api import get_config 8 | from ..exceptions import SecurityError 9 | 10 | 11 | def validate_file_access(file_path: Union[str, Path], project_root: Union[str, Path]) -> None: 12 | """ 13 | Validate a file can be safely accessed. 14 | 15 | Args: 16 | file_path: Path to validate 17 | project_root: Project root directory 18 | 19 | Raises: 20 | SecurityError: If path fails validation 21 | """ 22 | # Always get a fresh config for each validation 23 | config = get_config() 24 | logger = logging.getLogger(__name__) 25 | 26 | path_obj = Path(file_path) 27 | root_obj = Path(project_root) 28 | 29 | # Normalize paths to prevent directory traversal 30 | try: 31 | normalized_path = path_obj.resolve() 32 | normalized_root = root_obj.resolve() 33 | except (ValueError, OSError) as e: 34 | raise SecurityError(f"Invalid path: {e}") from e 35 | 36 | # Check if path is inside project root 37 | if not str(normalized_path).startswith(str(normalized_root)): 38 | raise SecurityError(f"Access denied: {file_path} is outside project root") 39 | 40 | # Check excluded directories 41 | for excluded in config.security.excluded_dirs: 42 | if excluded in normalized_path.parts: 43 | raise SecurityError(f"Access denied to excluded directory: {excluded}") 44 | 45 | # Check file extension if restriction is enabled 46 | if config.security.allowed_extensions and path_obj.suffix.lower()[1:] not in config.security.allowed_extensions: 47 | raise SecurityError(f"File type not allowed: {path_obj.suffix}") 48 | 49 | # Check file size if it exists 50 | if normalized_path.exists() and normalized_path.is_file(): 51 | file_size_mb = normalized_path.stat().st_size / (1024 * 1024) 52 | max_file_size_mb = config.security.max_file_size_mb 53 | logger.debug(f"File size check: {file_size_mb:.2f}MB, limit: {max_file_size_mb}MB") 54 | if file_size_mb > max_file_size_mb: 55 | raise SecurityError(f"File too large: {file_size_mb:.2f}MB exceeds limit of {max_file_size_mb}MB") 56 | -------------------------------------------------------------------------------- /tests/test_persistent_server.py: -------------------------------------------------------------------------------- 1 | """Tests for the persistent MCP server implementation.""" 2 | 3 | import tempfile 4 | 5 | from mcp_server_tree_sitter.models.project import ProjectRegistry 6 | from mcp_server_tree_sitter.server import ( 7 | mcp, 8 | ) # Was previously importing from persistent_server 9 | 10 | # Use the actual project registry for persistence tests 11 | project_registry = ProjectRegistry() 12 | 13 | 14 | def test_persistent_mcp_instance() -> None: 15 | """Test that the persistent MCP instance works properly.""" 16 | # Simply check that the instance exists 17 | assert mcp is not None 18 | assert mcp.name == "tree_sitter" 19 | 20 | 21 | def test_persistent_project_registration() -> None: 22 | """Test that project registration persists across different functions.""" 23 | # We can't directly clear projects in the new design 24 | # Instead, let's just work with existing ones 25 | 26 | # Create a temporary directory 27 | with tempfile.TemporaryDirectory() as temp_dir: 28 | project_name = "persistent_test" 29 | 30 | # Register a project directly using the registry 31 | project = project_registry.register_project(project_name, temp_dir) 32 | 33 | # Verify it was registered 34 | assert project.name == project_name 35 | all_projects = project_registry.list_projects() 36 | project_names = [p["name"] for p in all_projects] 37 | assert project_name in project_names 38 | 39 | # Get the project again to verify persistence 40 | project2 = project_registry.get_project(project_name) 41 | assert project2.name == project_name 42 | 43 | # List projects to verify it's included 44 | projects = project_registry.list_projects() 45 | assert any(p["name"] == project_name for p in projects) 46 | 47 | 48 | def test_project_registry_singleton() -> None: 49 | """Test that project_registry is a singleton that persists.""" 50 | # Check singleton behavior 51 | registry1 = ProjectRegistry() 52 | registry2 = ProjectRegistry() 53 | 54 | # Should be the same instance 55 | assert registry1 is registry2 56 | 57 | # Get projects from both registries 58 | projects1 = registry1.list_projects() 59 | projects2 = registry2.list_projects() 60 | 61 | # Should have the same number of projects 62 | assert len(projects1) == len(projects2) 63 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | id-token: write 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | timeout-minutes: 5 15 | steps: 16 | - uses: actions/checkout@v4 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v5 20 | with: 21 | python-version: "3.12" 22 | 23 | - name: Install uv 24 | run: | 25 | curl -LsSf https://astral.sh/uv/install.sh | sh 26 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH 27 | 28 | - name: Install development dependencies 29 | run: | 30 | uv venv 31 | source .venv/bin/activate 32 | uv pip install -e ".[dev]" 33 | 34 | - name: Run comprehensive tests 35 | run: | 36 | source .venv/bin/activate 37 | # Run linting and formatting 38 | ruff check . 39 | ruff format . --check 40 | mypy src/mcp_server_tree_sitter 41 | 42 | # Run all tests (regular + diagnostics) 43 | pytest tests 44 | pytest tests/test_diagnostics/ -v 45 | env: 46 | PYTHONPATH: ${{ github.workspace }}/src 47 | 48 | - name: Ensure diagnostic results directory exists 49 | if: always() 50 | run: mkdir -p diagnostic_results 51 | 52 | - name: Create placeholder if needed 53 | if: always() 54 | run: | 55 | if [ -z "$(ls -A diagnostic_results 2>/dev/null)" ]; then 56 | echo '{"info": "No diagnostic results generated"}' > diagnostic_results/placeholder.json 57 | fi 58 | 59 | - name: Archive diagnostic results 60 | if: always() 61 | uses: actions/upload-artifact@v4 62 | with: 63 | name: diagnostic-results-release 64 | path: diagnostic_results/ 65 | retention-days: 7 66 | if-no-files-found: warn 67 | 68 | - name: Install build dependencies 69 | run: | 70 | source .venv/bin/activate 71 | uv pip install build twine 72 | 73 | - name: Build package 74 | run: | 75 | source .venv/bin/activate 76 | python -m build 77 | 78 | - name: Test wheel 79 | run: | 80 | python -m pip install dist/*.whl 81 | mcp-server-tree-sitter --help 82 | 83 | - name: Publish to PyPI 84 | uses: pypa/gh-action-pypi-publish@release/v1 85 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "mcp-server-tree-sitter" 7 | version = "0.5.1" 8 | description = "MCP Server for Tree-sitter code analysis" 9 | readme = "README.md" 10 | requires-python = ">=3.10" 11 | license = {text = "MIT"} 12 | authors = [ 13 | {name = "Wrale LTD", email = "contact@wrale.com"} 14 | ] 15 | classifiers = [ 16 | "Development Status :: 3 - Alpha", 17 | "Intended Audience :: Developers", 18 | "License :: OSI Approved :: MIT License", 19 | "Programming Language :: Python :: 3", 20 | "Programming Language :: Python :: 3.10", 21 | "Programming Language :: Python :: 3.11", 22 | "Programming Language :: Python :: 3.12", 23 | ] 24 | dependencies = [ 25 | "mcp[cli]>=0.12.0", 26 | "tree-sitter>=0.20.0", 27 | "tree-sitter-language-pack>=0.6.1", 28 | "pyyaml>=6.0", 29 | "pydantic>=2.0.0", 30 | "types-pyyaml>=6.0.12.20241230", 31 | ] 32 | 33 | [project.optional-dependencies] 34 | dev = [ 35 | "pytest>=7.0.0", 36 | "pytest-cov>=4.0.0", 37 | "ruff>=0.0.262", 38 | "mypy>=1.2.0", 39 | ] 40 | # Language support (now included via tree-sitter-language-pack) 41 | languages = [ 42 | # No individual languages needed as tree-sitter-language-pack provides all 43 | ] 44 | 45 | [project.urls] 46 | "Homepage" = "https://github.com/wrale/mcp-server-tree-sitter" 47 | "Bug Tracker" = "https://github.com/wrale/mcp-server-tree-sitter/issues" 48 | 49 | [project.scripts] 50 | mcp-server-tree-sitter = "mcp_server_tree_sitter.server:main" 51 | 52 | [tool.hatch.build.targets.wheel] 53 | packages = ["src/mcp_server_tree_sitter"] 54 | 55 | [tool.pytest.ini_options] 56 | testpaths = ["tests"] 57 | python_files = "test_*.py" 58 | python_classes = "Test*" 59 | python_functions = "test_*" 60 | markers = [ 61 | "diagnostic: mark test as producing diagnostic information", 62 | ] 63 | 64 | [tool.mypy] 65 | python_version = "3.10" 66 | warn_return_any = true 67 | warn_unused_configs = true 68 | disallow_untyped_defs = true 69 | disallow_incomplete_defs = true 70 | 71 | [[tool.mypy.overrides]] 72 | module = "tree_sitter.*" 73 | ignore_missing_imports = true 74 | 75 | [[tool.mypy.overrides]] 76 | module = "tests.*" 77 | disallow_untyped_defs = false 78 | disallow_incomplete_defs = false 79 | check_untyped_defs = false 80 | warn_return_any = false 81 | warn_no_return = false 82 | 83 | [tool.ruff] 84 | line-length = 120 85 | target-version = "py310" 86 | 87 | [tool.ruff.lint] 88 | select = ["E", "F", "I", "W", "B"] 89 | -------------------------------------------------------------------------------- /docs/cli.md: -------------------------------------------------------------------------------- 1 | # MCP Tree-sitter Server CLI Guide 2 | 3 | This document explains the command-line interface (CLI) for the MCP Tree-sitter Server, including available options and usage patterns. 4 | 5 | ## Command-Line Arguments 6 | 7 | The MCP Tree-sitter Server provides a command-line interface with several options: 8 | 9 | ```bash 10 | mcp-server-tree-sitter [options] 11 | ``` 12 | 13 | ### Available Options 14 | 15 | | Option | Description | 16 | |--------|-------------| 17 | | `--help` | Show help message and exit | 18 | | `--version` | Show version information and exit | 19 | | `--config CONFIG` | Path to configuration file | 20 | | `--debug` | Enable debug logging | 21 | | `--disable-cache` | Disable parse tree caching | 22 | 23 | ### Examples 24 | 25 | Display help information: 26 | ```bash 27 | mcp-server-tree-sitter --help 28 | ``` 29 | 30 | Show version information: 31 | ```bash 32 | mcp-server-tree-sitter --version 33 | ``` 34 | 35 | Run with a custom configuration file: 36 | ```bash 37 | mcp-server-tree-sitter --config /path/to/config.yaml 38 | ``` 39 | 40 | Enable debug logging: 41 | ```bash 42 | mcp-server-tree-sitter --debug 43 | ``` 44 | 45 | Disable parse tree caching: 46 | ```bash 47 | mcp-server-tree-sitter --disable-cache 48 | ``` 49 | 50 | ## Running with MCP 51 | 52 | The server can also be run using the MCP command-line interface: 53 | 54 | ```bash 55 | # Run the server 56 | mcp run mcp_server_tree_sitter.server 57 | 58 | # Run with the MCP Inspector 59 | mcp dev mcp_server_tree_sitter.server 60 | ``` 61 | 62 | You can pass the same arguments to these commands: 63 | 64 | ```bash 65 | # Enable debug logging 66 | mcp run mcp_server_tree_sitter.server --debug 67 | 68 | # Use a custom configuration file with the inspector 69 | mcp dev mcp_server_tree_sitter.server --config /path/to/config.yaml 70 | ``` 71 | 72 | ## Using Makefile Targets 73 | 74 | For convenience, the project provides Makefile targets for common operations: 75 | 76 | ```bash 77 | # Show available targets 78 | make 79 | 80 | # Run the server with default settings 81 | make mcp-run 82 | 83 | # Run with specific arguments 84 | make mcp-run ARGS="--debug --config /path/to/config.yaml" 85 | 86 | # Run with the inspector 87 | make mcp-dev ARGS="--debug" 88 | ``` 89 | 90 | ## Environment Variables 91 | 92 | The server also supports configuration through environment variables: 93 | 94 | ```bash 95 | # Set log level 96 | export MCP_TS_LOG_LEVEL=DEBUG 97 | 98 | # Set configuration file path 99 | export MCP_TS_CONFIG_PATH=/path/to/config.yaml 100 | 101 | # Run the server 102 | mcp-server-tree-sitter 103 | ``` 104 | 105 | See the [Configuration Guide](./config.md) for more details on environment variables and configuration options. 106 | -------------------------------------------------------------------------------- /tests/test_basic.py: -------------------------------------------------------------------------------- 1 | """Basic tests for mcp-server-tree-sitter.""" 2 | 3 | import tempfile 4 | 5 | from mcp_server_tree_sitter.config import ServerConfig 6 | from mcp_server_tree_sitter.language.registry import LanguageRegistry 7 | from mcp_server_tree_sitter.models.project import ProjectRegistry 8 | 9 | 10 | def test_config_default() -> None: 11 | """Test that default configuration is loaded.""" 12 | # Create a default configuration 13 | config = ServerConfig() 14 | 15 | # Check defaults 16 | assert config.cache.enabled is True 17 | assert config.cache.max_size_mb == 100 18 | assert config.security.max_file_size_mb == 5 19 | assert ".git" in config.security.excluded_dirs 20 | 21 | 22 | def test_project_registry() -> None: 23 | """Test project registry functionality.""" 24 | registry = ProjectRegistry() 25 | 26 | # Create a temporary directory 27 | with tempfile.TemporaryDirectory() as temp_dir: 28 | # Register a project 29 | project = registry.register_project("test", temp_dir) 30 | 31 | # Check project details 32 | assert project.name == "test" 33 | # Use os.path.samefile to compare paths instead of string comparison 34 | # This handles platform-specific path normalization 35 | # (e.g., /tmp -> /private/tmp on macOS) 36 | import os 37 | 38 | assert os.path.samefile(str(project.root_path), temp_dir) 39 | 40 | # List projects 41 | projects = registry.list_projects() 42 | assert len(projects) == 1 43 | assert projects[0]["name"] == "test" 44 | 45 | # Get project 46 | project2 = registry.get_project("test") 47 | assert project2.name == "test" 48 | 49 | # Remove project 50 | registry.remove_project("test") 51 | projects = registry.list_projects() 52 | assert len(projects) == 0 53 | 54 | 55 | def test_language_registry() -> None: 56 | """Test language registry functionality.""" 57 | registry = LanguageRegistry() 58 | 59 | # Test language detection 60 | assert registry.language_for_file("test.py") == "python" 61 | assert registry.language_for_file("script.js") == "javascript" 62 | assert registry.language_for_file("style.css") == "css" 63 | 64 | # Test available languages 65 | languages = registry.list_available_languages() 66 | assert isinstance(languages, list) 67 | 68 | # Test installable languages (should be empty now with language-pack) 69 | installable = registry.list_installable_languages() 70 | assert isinstance(installable, list) 71 | assert len(installable) == 0 # No languages need to be separately installed 72 | 73 | 74 | if __name__ == "__main__": 75 | # Run tests 76 | test_config_default() 77 | test_project_registry() 78 | test_language_registry() 79 | print("All tests passed!") 80 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/path.py: -------------------------------------------------------------------------------- 1 | """Path utilities for mcp-server-tree-sitter.""" 2 | 3 | import os 4 | from pathlib import Path 5 | from typing import Union 6 | 7 | 8 | def normalize_path(path: Union[str, Path], ensure_absolute: bool = False) -> Path: 9 | """ 10 | Normalize a path for cross-platform compatibility. 11 | 12 | Args: 13 | path: Path string or object 14 | ensure_absolute: If True, raises ValueError for relative paths 15 | 16 | Returns: 17 | Normalized Path object 18 | """ 19 | path_obj = Path(path).expanduser().resolve() 20 | 21 | if ensure_absolute and not path_obj.is_absolute(): 22 | raise ValueError(f"Path must be absolute: {path}") 23 | 24 | return path_obj 25 | 26 | 27 | def safe_relative_path(path: Union[str, Path], base: Union[str, Path]) -> Path: 28 | """ 29 | Safely get a relative path that prevents directory traversal attacks. 30 | 31 | Args: 32 | path: Target path 33 | base: Base directory that should contain the path 34 | 35 | Returns: 36 | Relative path object 37 | 38 | Raises: 39 | ValueError: If path attempts to escape base directory 40 | """ 41 | base_path = normalize_path(base) 42 | target_path = normalize_path(path) 43 | 44 | # Ensure target is within base 45 | try: 46 | relative = target_path.relative_to(base_path) 47 | # Check for directory traversal 48 | if ".." in str(relative).split(os.sep): 49 | raise ValueError(f"Path contains forbidden directory traversal: {path}") 50 | return relative 51 | except ValueError as e: 52 | raise ValueError(f"Path {path} is not within base directory {base}") from e 53 | 54 | 55 | def get_project_root(path: Union[str, Path]) -> Path: 56 | """ 57 | Attempt to determine project root from a file path by looking for common markers. 58 | 59 | Args: 60 | path: Path to start from (file or directory) 61 | 62 | Returns: 63 | Path to likely project root 64 | """ 65 | path_obj = normalize_path(path) 66 | 67 | # If path is a file, start from its directory 68 | if path_obj.is_file(): 69 | path_obj = path_obj.parent 70 | 71 | # Look for common project indicators 72 | markers = [ 73 | ".git", 74 | "pyproject.toml", 75 | "setup.py", 76 | "package.json", 77 | "Cargo.toml", 78 | "CMakeLists.txt", 79 | ".svn", 80 | "Makefile", 81 | ] 82 | 83 | # Start from path and go up directories until a marker is found 84 | current = path_obj 85 | while current != current.parent: # Stop at filesystem root 86 | for marker in markers: 87 | if (current / marker).exists(): 88 | return current 89 | current = current.parent 90 | 91 | # If no marker found, return original directory 92 | return path_obj 93 | -------------------------------------------------------------------------------- /tests/test_language_listing.py: -------------------------------------------------------------------------------- 1 | """Test for language listing functionality.""" 2 | 3 | from mcp_server_tree_sitter.language.registry import LanguageRegistry 4 | from tests.test_helpers import check_language_available, list_languages 5 | 6 | 7 | def test_list_available_languages() -> None: 8 | """Test that list_available_languages returns languages correctly.""" 9 | registry = LanguageRegistry() 10 | 11 | # Get available languages 12 | available_languages = registry.list_available_languages() 13 | 14 | # Check for common languages we expect to be available 15 | expected_languages = [ 16 | "python", 17 | "javascript", 18 | "typescript", 19 | "c", 20 | "cpp", 21 | "go", 22 | "rust", 23 | ] 24 | 25 | # Assert that we have languages available 26 | assert len(available_languages) > 0, "No languages available" 27 | 28 | # Assert that we find at least some of our expected languages 29 | for lang in expected_languages: 30 | assert lang in available_languages, f"Expected language {lang} not in available languages" 31 | 32 | 33 | def test_language_api_consistency() -> None: 34 | """Test consistency between language detection and language listing.""" 35 | registry = LanguageRegistry() 36 | 37 | # Test with a few common languages 38 | test_languages = [ 39 | "python", 40 | "javascript", 41 | "typescript", 42 | "c", 43 | "cpp", 44 | "go", 45 | "rust", 46 | ] 47 | 48 | # Check each language both through is_language_available and list_available_languages 49 | available_languages = registry.list_available_languages() 50 | 51 | for lang in test_languages: 52 | is_available = registry.is_language_available(lang) 53 | is_listed = lang in available_languages 54 | 55 | # Both methods should return the same result 56 | assert is_available == is_listed, f"Inconsistency for {lang}: available={is_available}, listed={is_listed}" 57 | 58 | 59 | def test_server_language_tools() -> None: 60 | """Test the server language tools.""" 61 | # Test list_languages 62 | languages_result = list_languages() 63 | assert "available" in languages_result, "Missing 'available' key in list_languages result" 64 | assert isinstance(languages_result["available"], list), "'available' should be a list" 65 | assert len(languages_result["available"]) > 0, "No languages available" 66 | 67 | # Test each language with check_language_available 68 | for lang in ["python", "javascript", "typescript"]: 69 | result = check_language_available(lang) 70 | assert result["status"] == "success", f"Language {lang} should be available" 71 | assert "message" in result, "Missing 'message' key in check_language_available result" 72 | 73 | 74 | if __name__ == "__main__": 75 | test_list_available_languages() 76 | test_language_api_consistency() 77 | test_server_language_tools() 78 | print("All tests passed!") 79 | -------------------------------------------------------------------------------- /tests/test_ast_cursor.py: -------------------------------------------------------------------------------- 1 | """Test the cursor-based AST implementation.""" 2 | 3 | import tempfile 4 | from pathlib import Path 5 | 6 | from mcp_server_tree_sitter.language.registry import LanguageRegistry 7 | from mcp_server_tree_sitter.models.ast_cursor import node_to_dict_cursor 8 | from mcp_server_tree_sitter.utils.file_io import read_binary_file 9 | from mcp_server_tree_sitter.utils.tree_sitter_helpers import create_parser, parse_source 10 | 11 | 12 | def test_cursor_based_ast() -> None: 13 | """Test that the cursor-based AST node_to_dict function works.""" 14 | # Create a temporary test file 15 | with tempfile.NamedTemporaryFile(suffix=".py", mode="w+") as f: 16 | f.write("def hello():\n print('Hello, world!')\n\nhello()\n") 17 | f.flush() 18 | 19 | file_path = Path(f.name) 20 | 21 | # Set up language registry 22 | registry = LanguageRegistry() 23 | language = registry.language_for_file(file_path.name) 24 | assert language is not None, "Could not detect language for test file" 25 | language_obj = registry.get_language(language) 26 | 27 | # Parse the file 28 | parser = create_parser(language_obj) 29 | source_bytes = read_binary_file(file_path) 30 | tree = parse_source(source_bytes, parser) 31 | 32 | # Get AST using cursor-based approach 33 | cursor_ast = node_to_dict_cursor(tree.root_node, source_bytes, max_depth=3) 34 | 35 | # Basic validation 36 | assert "id" in cursor_ast, "AST should include node ID" 37 | assert cursor_ast["type"] == "module", "Root node should be a module" 38 | assert "children" in cursor_ast, "AST should include children" 39 | assert len(cursor_ast["children"]) > 0, "AST should have at least one child" 40 | 41 | # Check function definition 42 | if cursor_ast["children"]: 43 | function_node = cursor_ast["children"][0] 44 | assert function_node["type"] == "function_definition", "Expected function definition" 45 | 46 | # Check if children are properly included 47 | assert "children" in function_node, "Function should have children" 48 | assert function_node["children_count"] > 0, "Function should have children" 49 | 50 | # Verify some function components exist 51 | function_children_types = [child["type"] for child in function_node["children"]] 52 | assert "identifier" in function_children_types, "Function should have identifier" 53 | 54 | # Verify text extraction works if available 55 | if "text" in function_node: 56 | # Check for 'hello' in the text, handling both string and bytes 57 | if isinstance(function_node["text"], bytes): 58 | assert b"hello" in function_node["text"], "Function text should contain 'hello'" 59 | else: 60 | assert "hello" in function_node["text"], "Function text should contain 'hello'" 61 | 62 | 63 | if __name__ == "__main__": 64 | test_cursor_based_ast() 65 | print("All tests passed!") 66 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/__main__.py: -------------------------------------------------------------------------------- 1 | """Main entry point for mcp-server-tree-sitter.""" 2 | 3 | import argparse 4 | import os 5 | import sys 6 | 7 | from .bootstrap import get_logger, update_log_levels 8 | from .config import load_config 9 | from .context import global_context 10 | from .server import mcp 11 | 12 | # Get a properly configured logger 13 | logger = get_logger(__name__) 14 | 15 | 16 | def main() -> int: 17 | """Run the server with optional arguments.""" 18 | # Parse command line arguments 19 | parser = argparse.ArgumentParser(description="MCP Tree-sitter Server - Code analysis with tree-sitter") 20 | parser.add_argument("--config", help="Path to configuration file") 21 | parser.add_argument("--debug", action="store_true", help="Enable debug logging") 22 | parser.add_argument("--disable-cache", action="store_true", help="Disable parse tree caching") 23 | parser.add_argument("--version", action="store_true", help="Show version and exit") 24 | 25 | args = parser.parse_args() 26 | 27 | # Handle version display 28 | if args.version: 29 | import importlib.metadata 30 | 31 | try: 32 | version = importlib.metadata.version("mcp-server-tree-sitter") 33 | print(f"mcp-server-tree-sitter version {version}") 34 | except importlib.metadata.PackageNotFoundError: 35 | print("mcp-server-tree-sitter (version unknown - package not installed)") 36 | return 0 37 | 38 | # Set up logging level 39 | if args.debug: 40 | # Set environment variable first for consistency 41 | os.environ["MCP_TS_LOG_LEVEL"] = "DEBUG" 42 | # Then update log levels 43 | update_log_levels("DEBUG") 44 | logger.debug("Debug logging enabled") 45 | 46 | # Load configuration 47 | try: 48 | config = load_config(args.config) 49 | 50 | # Update global context with config 51 | if args.config: 52 | global_context.config_manager.load_from_file(args.config) 53 | else: 54 | # Update individual settings from config 55 | global_context.config_manager.update_value("cache.enabled", config.cache.enabled) 56 | global_context.config_manager.update_value("cache.max_size_mb", config.cache.max_size_mb) 57 | global_context.config_manager.update_value("security.max_file_size_mb", config.security.max_file_size_mb) 58 | global_context.config_manager.update_value("language.default_max_depth", config.language.default_max_depth) 59 | 60 | logger.debug("Configuration loaded successfully") 61 | except Exception as e: 62 | logger.error(f"Error loading configuration: {e}") 63 | return 1 64 | 65 | # Run the server 66 | try: 67 | logger.info("Starting MCP Tree-sitter Server (with state persistence)") 68 | mcp.run() 69 | except KeyboardInterrupt: 70 | logger.info("Server stopped by user") 71 | except Exception as e: 72 | logger.error(f"Error running server: {e}") 73 | return 1 74 | 75 | return 0 76 | 77 | 78 | if __name__ == "__main__": 79 | sys.exit(main()) 80 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to MCP Tree-sitter Server 2 | 3 | Thank you for your interest in contributing to MCP Tree-sitter Server! This guide will help you understand our development process and coding standards. 4 | 5 | ## Development Setup 6 | 7 | 1. Clone the repository: 8 | ```bash 9 | git clone https://github.com/organization/mcp-server-tree-sitter.git 10 | cd mcp-server-tree-sitter 11 | ``` 12 | 13 | 2. Install with development dependencies: 14 | ```bash 15 | make install-dev 16 | ``` 17 | 18 | 3. Install language parsers (optional): 19 | ```bash 20 | make install-languages 21 | ``` 22 | 23 | ## Code Style and Standards 24 | 25 | We follow a strict set of coding standards to maintain consistency throughout the codebase: 26 | 27 | ### Python Style 28 | 29 | - We use [Black](https://black.readthedocs.io/) for code formatting with a line length of 88 characters 30 | - We use [Ruff](https://github.com/charliermarsh/ruff) for linting 31 | - We use [MyPy](https://mypy.readthedocs.io/) for static type checking 32 | 33 | ### Exception Handling 34 | 35 | - Use specific exception types rather than catching generic exceptions when possible 36 | - When re-raising exceptions, use the `from` clause to preserve the stack trace: 37 | ```python 38 | try: 39 | # Some code 40 | except SomeError as e: 41 | raise CustomError("Meaningful message") from e 42 | ``` 43 | 44 | ### Testing 45 | 46 | - Write tests for all new functionality 47 | - Run tests before submitting: 48 | ```bash 49 | make test 50 | ``` 51 | 52 | ### Documentation 53 | 54 | - Document all functions, classes, and modules using docstrings 55 | - Follow the Google Python Style Guide for docstrings 56 | - Include type hints for all function parameters and return values 57 | 58 | ## Development Workflow 59 | 60 | 1. Create a branch for your feature or bugfix: 61 | ```bash 62 | git checkout -b feature/your-feature-name 63 | ``` 64 | 65 | 2. Make your changes and ensure they pass linting and tests: 66 | ```bash 67 | make format 68 | make lint 69 | make test 70 | ``` 71 | 72 | 3. Commit your changes with a clear message describing the change 73 | 74 | 4. Submit a pull request to the main repository 75 | 76 | ## Running the Server 77 | 78 | You can run the server in different modes: 79 | 80 | - For development and testing: 81 | ```bash 82 | make mcp-dev 83 | ``` 84 | 85 | - For direct execution: 86 | ```bash 87 | make mcp-run 88 | ``` 89 | 90 | - To install in Claude Desktop: 91 | ```bash 92 | make mcp-install 93 | ``` 94 | 95 | ## Project Architecture 96 | 97 | The project follows a modular architecture: 98 | 99 | - `config.py` - Configuration management 100 | - `language/` - Tree-sitter language handling 101 | - `models/` - Data models for AST and projects 102 | - `cache/` - Caching mechanisms 103 | - `resources/` - MCP resources (files, AST) 104 | - `tools/` - MCP tools (search, analysis, etc.) 105 | - `utils/` - Utility functions 106 | - `prompts/` - MCP prompts 107 | - `server.py` - FastMCP server implementation 108 | 109 | ## Seeking Help 110 | 111 | If you have questions or need help, please open an issue or contact the maintainers. 112 | -------------------------------------------------------------------------------- /tests/test_cli_arguments.py: -------------------------------------------------------------------------------- 1 | """Tests for command-line argument handling.""" 2 | 3 | import subprocess 4 | import sys 5 | from unittest.mock import patch 6 | 7 | import pytest 8 | 9 | from mcp_server_tree_sitter.server import main 10 | 11 | 12 | def test_help_flag_does_not_start_server(): 13 | """Test that --help flag prints help and doesn't start the server.""" 14 | # Use subprocess to test the actual command 15 | result = subprocess.run( 16 | [sys.executable, "-m", "mcp_server_tree_sitter", "--help"], 17 | capture_output=True, 18 | text=True, 19 | check=False, 20 | ) 21 | 22 | # Check that it exited successfully 23 | assert result.returncode == 0 24 | 25 | # Check that the help text was printed 26 | assert "MCP Tree-sitter Server" in result.stdout 27 | assert "--help" in result.stdout 28 | assert "--config" in result.stdout 29 | 30 | # Server should not have started - no startup messages 31 | assert "Starting MCP Tree-sitter Server" not in result.stdout 32 | 33 | 34 | def test_version_flag_exits_without_starting_server(): 35 | """Test that --version shows version and exits without starting the server.""" 36 | result = subprocess.run( 37 | [sys.executable, "-m", "mcp_server_tree_sitter", "--version"], 38 | capture_output=True, 39 | text=True, 40 | check=False, 41 | ) 42 | 43 | # Check that it exited successfully 44 | assert result.returncode == 0 45 | 46 | # Check that the version was printed 47 | assert "mcp-server-tree-sitter version" in result.stdout 48 | 49 | # Server should not have started 50 | assert "Starting MCP Tree-sitter Server" not in result.stdout 51 | 52 | 53 | def test_direct_script_help_flag(): 54 | """Test that mcp-server-tree-sitter --help works correctly when called as a script.""" 55 | # This uses a mock to avoid actually calling the script binary 56 | with ( 57 | patch("sys.argv", ["mcp-server-tree-sitter", "--help"]), 58 | patch("argparse.ArgumentParser.parse_args") as mock_parse_args, 59 | # We don't actually need to use mock_exit in the test, 60 | # but we still want to patch sys.exit to prevent actual exits 61 | patch("sys.exit"), 62 | ): 63 | # Mock the ArgumentParser.parse_args to simulate --help behavior 64 | # When --help is used, argparse exits with code 0 after printing help 65 | mock_parse_args.side_effect = SystemExit(0) 66 | 67 | # This should catch the SystemExit raised by parse_args 68 | with pytest.raises(SystemExit) as excinfo: 69 | main() 70 | 71 | # Verify it's exiting with code 0 (success) 72 | assert excinfo.value.code == 0 73 | 74 | 75 | def test_entry_point_implementation(): 76 | """Verify that the entry point properly uses argparse for argument handling.""" 77 | import inspect 78 | 79 | from mcp_server_tree_sitter.server import main 80 | 81 | # Get the source code of the main function 82 | source = inspect.getsource(main) 83 | 84 | # Check that it's using argparse 85 | assert "argparse.ArgumentParser" in source 86 | assert "parse_args" in source 87 | 88 | # Check for proper handling of key flags 89 | assert "--help" in source or "automatically" in source # argparse adds --help automatically 90 | assert "--version" in source 91 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | python-version: ["3.12"] 15 | install-method: ["uv", "uvx"] 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v5 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | 25 | - name: Install uv 26 | run: | 27 | curl -LsSf https://astral.sh/uv/install.sh | sh 28 | echo "$HOME/.cargo/bin" >> $GITHUB_PATH 29 | 30 | - name: Install dependencies with uv 31 | if: matrix.install-method == 'uv' 32 | run: | 33 | uv venv 34 | source .venv/bin/activate 35 | uv pip install -e ".[dev]" 36 | which ruff 37 | which python 38 | 39 | - name: Install globally with uvx (system-wide) 40 | if: matrix.install-method == 'uvx' 41 | run: | 42 | python -m pip install -e ".[dev]" 43 | which ruff 44 | which python 45 | 46 | - name: Run checks and tests (uv) 47 | if: matrix.install-method == 'uv' 48 | run: | 49 | source .venv/bin/activate 50 | # Linting and formatting 51 | ruff check . 52 | ruff format . --check 53 | mypy src/mcp_server_tree_sitter 54 | # Run all tests including diagnostics 55 | pytest tests 56 | pytest tests/test_diagnostics/ -v 57 | env: 58 | PYTHONPATH: ${{ github.workspace }}/src 59 | 60 | - name: Run checks and tests (system) 61 | if: matrix.install-method == 'uvx' 62 | run: | 63 | # Linting and formatting 64 | ruff check . 65 | ruff format . --check 66 | mypy src/mcp_server_tree_sitter 67 | # Run all tests including diagnostics 68 | pytest tests 69 | pytest tests/test_diagnostics/ -v 70 | env: 71 | PYTHONPATH: ${{ github.workspace }}/src 72 | 73 | - name: Ensure diagnostic results directory exists 74 | if: always() 75 | run: mkdir -p diagnostic_results 76 | 77 | - name: Create placeholder if needed 78 | if: always() 79 | run: | 80 | if [ -z "$(ls -A diagnostic_results 2>/dev/null)" ]; then 81 | echo '{"info": "No diagnostic results generated"}' > diagnostic_results/placeholder.json 82 | fi 83 | 84 | - name: Archive diagnostic results 85 | if: always() 86 | uses: actions/upload-artifact@v4 87 | with: 88 | name: diagnostic-results-${{ matrix.install-method }} 89 | path: diagnostic_results/ 90 | retention-days: 7 91 | if-no-files-found: warn 92 | 93 | verify-uvx: 94 | runs-on: ubuntu-latest 95 | timeout-minutes: 5 96 | steps: 97 | - uses: actions/checkout@v4 98 | 99 | - name: Set up Python 3.12 100 | uses: actions/setup-python@v5 101 | with: 102 | python-version: "3.12" 103 | 104 | - name: Install build dependencies 105 | run: | 106 | python -m pip install build 107 | python -m pip install uv 108 | 109 | - name: Build package 110 | run: python -m build 111 | 112 | - name: Install and verify 113 | run: | 114 | python -m pip install dist/*.whl 115 | mcp-server-tree-sitter --help 116 | -------------------------------------------------------------------------------- /tests/test_makefile_targets.py: -------------------------------------------------------------------------------- 1 | """Tests for Makefile targets to ensure they execute correctly.""" 2 | 3 | import os 4 | import re 5 | import subprocess 6 | from pathlib import Path 7 | 8 | 9 | def test_makefile_target_syntax(): 10 | """Test that critical Makefile targets are correctly formed.""" 11 | # Get the Makefile content 12 | makefile_path = Path(__file__).parent.parent / "Makefile" 13 | with open(makefile_path, "r") as f: 14 | makefile_content = f.read() 15 | 16 | # Test mcp targets - they should use uv run mcp directly 17 | mcp_target_pattern = r"mcp-(run|dev|install):\n\t\$\(UV\) run mcp" 18 | mcp_targets = re.findall(mcp_target_pattern, makefile_content) 19 | 20 | # We should find at least 3 matches (run, dev, install) 21 | assert len(mcp_targets) >= 3, "Missing proper mcp invocation in Makefile targets" 22 | 23 | # Check for correct server module reference 24 | assert "$(PACKAGE).server" in makefile_content, "Server module reference is incorrect" 25 | 26 | # Custom test for mcp-run 27 | mcp_run_pattern = r"mcp-run:.*\n\t\$\(UV\) run mcp run \$\(PACKAGE\)\.server" 28 | assert re.search(mcp_run_pattern, makefile_content), "mcp-run target is incorrectly formed" 29 | 30 | # Test that help is the default target 31 | assert ".PHONY: all help" in makefile_content, "help is not properly declared as .PHONY" 32 | assert "help: show-help" in makefile_content, "help is not properly set as default target" 33 | 34 | 35 | def test_makefile_target_execution(): 36 | """Test that Makefile targets execute correctly when invoked with --help.""" 37 | # We'll only try the --help flag since we don't want to actually start the server 38 | # Skip if not in a development environment 39 | if not os.path.exists("Makefile"): 40 | print("Skipping test_makefile_target_execution: Makefile not found") 41 | return 42 | 43 | # Skip this test in CI environment 44 | if os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true": 45 | print("Skipping test_makefile_target_execution in CI environment") 46 | return 47 | 48 | # Test mcp-run with --help 49 | try: 50 | # Use the make target with --help appended to see if it resolves correctly 51 | # We capture stderr because sometimes help messages go there 52 | result = subprocess.run( 53 | ["make", "mcp-run", "ARGS=--help"], 54 | capture_output=True, 55 | text=True, 56 | timeout=5, # Don't let this run too long 57 | check=False, 58 | env={**os.environ, "MAKEFLAGS": ""}, # Clear any inherited make flags 59 | ) 60 | 61 | # The run shouldn't fail catastrophically 62 | assert "File not found" not in result.stderr, "mcp-run can't find the module" 63 | 64 | # We expect to see help text in the output (stdout or stderr) 65 | output = result.stdout + result.stderr 66 | has_usage = "usage:" in output.lower() or "mcp run" in output 67 | 68 | # We don't fail the test if the help check fails - this is more of a warning 69 | # since the environment might not be set up to run make directly 70 | if not has_usage: 71 | print("WARNING: Couldn't verify mcp-run --help output; environment may not be properly configured") 72 | 73 | except (subprocess.SubprocessError, FileNotFoundError) as e: 74 | # Don't fail the test if we can't run make 75 | print(f"WARNING: Couldn't execute make command; skipping execution check: {e}") 76 | -------------------------------------------------------------------------------- /tests/test_env_config.py: -------------------------------------------------------------------------------- 1 | """Tests for environment variable configuration overrides.""" 2 | 3 | import os 4 | import tempfile 5 | 6 | import pytest 7 | import yaml 8 | 9 | from mcp_server_tree_sitter.config import ConfigurationManager 10 | 11 | 12 | @pytest.fixture 13 | def temp_yaml_file(): 14 | """Create a temporary YAML file with test configuration.""" 15 | with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: 16 | test_config = { 17 | "cache": {"enabled": True, "max_size_mb": 256, "ttl_seconds": 3600}, 18 | "security": {"max_file_size_mb": 10, "excluded_dirs": [".git", "node_modules", "__pycache__", ".cache"]}, 19 | "language": {"auto_install": True, "default_max_depth": 7}, 20 | } 21 | yaml.dump(test_config, temp_file) 22 | temp_file.flush() 23 | temp_file_path = temp_file.name 24 | 25 | yield temp_file_path 26 | 27 | # Clean up 28 | os.unlink(temp_file_path) 29 | 30 | 31 | def test_env_overrides_defaults(monkeypatch): 32 | """Environment variables should override hard-coded defaults.""" 33 | # Using single underscore format that matches current implementation 34 | monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "512") 35 | 36 | mgr = ConfigurationManager() 37 | cfg = mgr.get_config() 38 | 39 | assert cfg.cache.max_size_mb == 512, "Environment variable should override default value" 40 | # ensure other defaults stay intact 41 | assert cfg.security.max_file_size_mb == 5 42 | assert cfg.language.default_max_depth == 5 43 | 44 | 45 | def test_env_overrides_yaml(temp_yaml_file, monkeypatch): 46 | """Environment variables should take precedence over YAML values.""" 47 | # YAML sets 256; env var must win with 1024 48 | # Using single underscore format that matches current implementation 49 | monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "1024") 50 | 51 | # Also set a security env var to verify multiple variables work 52 | monkeypatch.setenv("MCP_TS_SECURITY_MAX_FILE_SIZE_MB", "15") 53 | 54 | mgr = ConfigurationManager() 55 | # First load the YAML file 56 | mgr.load_from_file(temp_yaml_file) 57 | 58 | # Get the loaded config 59 | cfg = mgr.get_config() 60 | 61 | # Verify environment variables override YAML settings 62 | assert cfg.cache.max_size_mb == 1024, "Environment variable should override YAML values" 63 | assert cfg.security.max_file_size_mb == 15, "Environment variable should override YAML values" 64 | 65 | # But YAML values that aren't overridden by env vars should remain 66 | assert cfg.cache.ttl_seconds == 3600 67 | assert cfg.language.default_max_depth == 7 68 | assert cfg.language.auto_install is True 69 | 70 | 71 | def test_log_level_env_var(monkeypatch): 72 | """Test the specific MCP_TS_LOG_LEVEL variable that was the original issue.""" 73 | monkeypatch.setenv("MCP_TS_LOG_LEVEL", "DEBUG") 74 | 75 | mgr = ConfigurationManager() 76 | cfg = mgr.get_config() 77 | 78 | assert cfg.log_level == "DEBUG", "Log level should be set from environment variable" 79 | 80 | 81 | def test_invalid_env_var_handling(monkeypatch): 82 | """Test that invalid environment variable values don't crash the system.""" 83 | # Set an invalid value for an integer field 84 | monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "not_a_number") 85 | 86 | # This should not raise an exception 87 | mgr = ConfigurationManager() 88 | cfg = mgr.get_config() 89 | 90 | # The default value should be used 91 | assert cfg.cache.max_size_mb == 100, "Invalid values should fall back to defaults" 92 | -------------------------------------------------------------------------------- /docs/tree-sitter-type-safety.md: -------------------------------------------------------------------------------- 1 | # Tree-sitter Type Safety Guide 2 | 3 | This document explains our approach to type safety when interfacing with the tree-sitter library and why certain type-checking suppressions are necessary. 4 | 5 | ## Background 6 | 7 | The MCP Tree-sitter Server maintains type safety through Python's type hints and mypy verification. However, when interfacing with external libraries like tree-sitter, we encounter challenges: 8 | 9 | 1. Tree-sitter's Python bindings have inconsistent API signatures across versions 10 | 2. Tree-sitter objects don't always match our protocol definitions 11 | 3. The library may work at runtime but fail static type checking 12 | 13 | ## Type Suppression Strategy 14 | 15 | We use targeted `# type: ignore` comments to handle specific scenarios where mypy can't verify correctness, but our runtime code handles the variations properly. 16 | 17 | ### Examples of Necessary Type Suppressions 18 | 19 | #### Parser Interface Variations 20 | 21 | Some versions of tree-sitter use `set_language()` while others use `language` as the attribute/method: 22 | 23 | ```python 24 | try: 25 | parser.set_language(safe_language) # type: ignore 26 | except AttributeError: 27 | if hasattr(parser, 'language'): 28 | # Use the language method if available 29 | parser.language = safe_language # type: ignore 30 | else: 31 | # Fallback to setting the attribute directly 32 | parser.language = safe_language # type: ignore 33 | ``` 34 | 35 | #### Node Handling Safety 36 | 37 | For cursor navigation and tree traversal, we need to handle potential `None` values: 38 | 39 | ```python 40 | def visit(node: Optional[Node], field_name: Optional[str], depth: int) -> bool: 41 | if node is None: 42 | return False 43 | # Continue with node operations... 44 | ``` 45 | 46 | ## Guidelines for Using Type Suppressions 47 | 48 | 1. **Be specific**: Always use `# type: ignore` on the exact line with the issue, not for entire blocks or files 49 | 2. **Add comments**: Explain why the suppression is necessary 50 | 3. **Try alternatives first**: Only use suppressions after trying to fix the actual type issue 51 | 4. **Include runtime checks**: Always pair suppressions with runtime checks (try/except, if hasattr, etc.) 52 | 53 | ## Our Pattern for Library Compatibility 54 | 55 | We follow a consistent pattern for tree-sitter API compatibility: 56 | 57 | 1. **Define Protocols**: Use Protocol classes to define expected interfaces 58 | 2. **Safe Type Casting**: Use wrapper functions like `ensure_node()` to safely cast objects 59 | 3. **Feature Detection**: Use `hasattr()` checks before accessing attributes 60 | 4. **Fallback Mechanisms**: Provide multiple ways to accomplish the same task 61 | 5. **Graceful Degradation**: Handle missing features by providing simplified alternatives 62 | 63 | ## Testing Approach 64 | 65 | Even with type suppressions, we ensure correctness through: 66 | 67 | 1. Comprehensive test coverage for different tree-sitter operations 68 | 2. Tests with and without tree-sitter installed to verify fallback mechanisms 69 | 3. Runtime verification of object capabilities before operations 70 | 71 | ## When to Update Type Suppressions 72 | 73 | Review and potentially remove type suppressions when: 74 | 75 | 1. Upgrading minimum supported tree-sitter version 76 | 2. Refactoring the interface to the tree-sitter library 77 | 3. Adding new wrapper functions that can handle type variations 78 | 4. Improving Protocol definitions to better match runtime behavior 79 | 80 | By following these guidelines, we maintain a balance between static type safety and runtime flexibility when working with the tree-sitter library. 81 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/debug.py: -------------------------------------------------------------------------------- 1 | """Debug tools for diagnosing configuration issues.""" 2 | 3 | from pathlib import Path 4 | from typing import Any, Dict 5 | 6 | import yaml 7 | 8 | from ..config import ServerConfig, update_config_from_new 9 | from ..context import global_context 10 | 11 | 12 | def diagnose_yaml_config(config_path: str) -> Dict[str, Any]: 13 | """Diagnose issues with YAML configuration loading. 14 | 15 | Args: 16 | config_path: Path to YAML config file 17 | 18 | Returns: 19 | Dictionary with diagnostic information 20 | """ 21 | result = { 22 | "file_path": config_path, 23 | "exists": False, 24 | "readable": False, 25 | "yaml_valid": False, 26 | "parsed_data": None, 27 | "config_before": None, 28 | "config_after": None, 29 | "error": None, 30 | } 31 | 32 | # Check if file exists 33 | path_obj = Path(config_path) 34 | result["exists"] = path_obj.exists() 35 | 36 | if not result["exists"]: 37 | result["error"] = f"File does not exist: {config_path}" 38 | return result 39 | 40 | # Check if file is readable 41 | try: 42 | with open(path_obj, "r") as f: 43 | content = f.read() 44 | result["readable"] = True 45 | result["file_content"] = content 46 | except Exception as e: 47 | result["error"] = f"Error reading file: {str(e)}" 48 | return result 49 | 50 | # Try to parse YAML 51 | try: 52 | config_data = yaml.safe_load(content) 53 | result["yaml_valid"] = True 54 | result["parsed_data"] = config_data 55 | except Exception as e: 56 | result["error"] = f"Error parsing YAML: {str(e)}" 57 | return result 58 | 59 | # Check if parsed data is None or empty 60 | if config_data is None: 61 | result["error"] = "YAML parser returned None (file empty or contains only comments)" 62 | return result 63 | 64 | if not isinstance(config_data, dict): 65 | result["error"] = f"YAML parser returned non-dict: {type(config_data)}" 66 | return result 67 | 68 | # Try creating a new config 69 | try: 70 | # Get current config 71 | current_config = global_context.get_config() 72 | result["config_before"] = { 73 | "cache.max_size_mb": current_config.cache.max_size_mb, 74 | "security.max_file_size_mb": current_config.security.max_file_size_mb, 75 | "language.default_max_depth": current_config.language.default_max_depth, 76 | } 77 | 78 | # Create new config from parsed data 79 | new_config = ServerConfig(**config_data) 80 | 81 | # Before update 82 | result["new_config"] = { 83 | "cache.max_size_mb": new_config.cache.max_size_mb, 84 | "security.max_file_size_mb": new_config.security.max_file_size_mb, 85 | "language.default_max_depth": new_config.language.default_max_depth, 86 | } 87 | 88 | # Update config 89 | update_config_from_new(current_config, new_config) 90 | 91 | # After update 92 | result["config_after"] = { 93 | "cache.max_size_mb": current_config.cache.max_size_mb, 94 | "security.max_file_size_mb": current_config.security.max_file_size_mb, 95 | "language.default_max_depth": current_config.language.default_max_depth, 96 | } 97 | 98 | except Exception as e: 99 | result["error"] = f"Error updating config: {str(e)}" 100 | return result 101 | 102 | return result 103 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/api.py: -------------------------------------------------------------------------------- 1 | """API functions for accessing container dependencies. 2 | 3 | This module provides function-based access to dependencies managed by the 4 | container, helping to break circular import chains and simplify access. 5 | """ 6 | 7 | import logging 8 | from typing import Any, Dict, List, Optional 9 | 10 | from .di import get_container 11 | from .exceptions import ProjectError 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | def get_project_registry() -> Any: 17 | """Get the project registry.""" 18 | return get_container().project_registry 19 | 20 | 21 | def get_language_registry() -> Any: 22 | """Get the language registry.""" 23 | return get_container().language_registry 24 | 25 | 26 | def get_tree_cache() -> Any: 27 | """Get the tree cache.""" 28 | return get_container().tree_cache 29 | 30 | 31 | def get_config() -> Any: 32 | """Get the current configuration.""" 33 | return get_container().get_config() 34 | 35 | 36 | def get_config_manager() -> Any: 37 | """Get the configuration manager.""" 38 | return get_container().config_manager 39 | 40 | 41 | def register_project(path: str, name: Optional[str] = None, description: Optional[str] = None) -> Dict[str, Any]: 42 | """Register a project.""" 43 | project_registry = get_project_registry() 44 | language_registry = get_language_registry() 45 | 46 | try: 47 | # Register project 48 | project = project_registry.register_project(name or path, path, description) 49 | 50 | # Scan for languages 51 | project.scan_files(language_registry) 52 | 53 | project_dict = project.to_dict() 54 | # Add type annotations 55 | result: Dict[str, Any] = { 56 | "name": project_dict["name"], 57 | "root_path": project_dict["root_path"], 58 | "description": project_dict["description"], 59 | "languages": project_dict["languages"], 60 | "last_scan_time": project_dict["last_scan_time"], 61 | } 62 | return result 63 | except Exception as e: 64 | raise ProjectError(f"Failed to register project: {e}") from e 65 | 66 | 67 | def list_projects() -> List[Dict[str, Any]]: 68 | """List all registered projects.""" 69 | projects_list = get_project_registry().list_projects() 70 | # Convert to explicitly typed list 71 | result: List[Dict[str, Any]] = [] 72 | for project in projects_list: 73 | result.append( 74 | { 75 | "name": project["name"], 76 | "root_path": project["root_path"], 77 | "description": project["description"], 78 | "languages": project["languages"], 79 | "last_scan_time": project["last_scan_time"], 80 | } 81 | ) 82 | return result 83 | 84 | 85 | def remove_project(name: str) -> Dict[str, str]: 86 | """Remove a registered project.""" 87 | get_project_registry().remove_project(name) 88 | return {"status": "success", "message": f"Project '{name}' removed"} 89 | 90 | 91 | def clear_cache(project: Optional[str] = None, file_path: Optional[str] = None) -> Dict[str, str]: 92 | """Clear the parse tree cache.""" 93 | tree_cache = get_tree_cache() 94 | 95 | if project and file_path: 96 | # Get file path 97 | project_registry = get_project_registry() 98 | project_obj = project_registry.get_project(project) 99 | abs_path = project_obj.get_file_path(file_path) 100 | 101 | # Clear cache 102 | tree_cache.invalidate(abs_path) 103 | return {"status": "success", "message": f"Cache cleared for {file_path} in {project}"} 104 | else: 105 | # Clear all 106 | tree_cache.invalidate() 107 | return {"status": "success", "message": "Cache cleared"} 108 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/project.py: -------------------------------------------------------------------------------- 1 | """Project management tools for MCP server.""" 2 | 3 | from typing import Any, Dict, List, Optional 4 | 5 | from ..api import get_language_registry, get_project_registry 6 | from ..exceptions import ProjectError 7 | 8 | 9 | def register_project(path: str, name: Optional[str] = None, description: Optional[str] = None) -> Dict[str, Any]: 10 | """ 11 | Register a project for code analysis. 12 | 13 | Args: 14 | path: Path to the project directory 15 | name: Optional name for the project (defaults to directory name) 16 | description: Optional description 17 | 18 | Returns: 19 | Project information 20 | """ 21 | # Get dependencies from API 22 | project_registry = get_project_registry() 23 | language_registry = get_language_registry() 24 | 25 | try: 26 | # Register project 27 | project = project_registry.register_project(name or path, path, description) 28 | 29 | # Scan for languages 30 | project.scan_files(language_registry) 31 | 32 | project_dict = project.to_dict() 33 | # Add type annotations for clarity 34 | result: Dict[str, Any] = { 35 | "name": project_dict["name"], 36 | "root_path": project_dict["root_path"], 37 | "description": project_dict["description"], 38 | "languages": project_dict["languages"], 39 | "last_scan_time": project_dict["last_scan_time"], 40 | } 41 | return result 42 | except Exception as e: 43 | raise ProjectError(f"Failed to register project: {e}") from e 44 | 45 | 46 | def get_project(name: str) -> Dict[str, Any]: 47 | """ 48 | Get project information. 49 | 50 | Args: 51 | name: Project name 52 | 53 | Returns: 54 | Project information 55 | """ 56 | # Get dependency from API 57 | project_registry = get_project_registry() 58 | 59 | try: 60 | project = project_registry.get_project(name) 61 | project_dict = project.to_dict() 62 | # Add type annotations for clarity 63 | result: Dict[str, Any] = { 64 | "name": project_dict["name"], 65 | "root_path": project_dict["root_path"], 66 | "description": project_dict["description"], 67 | "languages": project_dict["languages"], 68 | "last_scan_time": project_dict["last_scan_time"], 69 | } 70 | return result 71 | except Exception as e: 72 | raise ProjectError(f"Failed to get project: {e}") from e 73 | 74 | 75 | def list_projects() -> List[Dict[str, Any]]: 76 | """ 77 | List all registered projects. 78 | 79 | Returns: 80 | List of project information 81 | """ 82 | # Get dependency from API 83 | project_registry = get_project_registry() 84 | 85 | projects_list = project_registry.list_projects() 86 | # Explicitly create a typed list 87 | result: List[Dict[str, Any]] = [] 88 | for project in projects_list: 89 | result.append( 90 | { 91 | "name": project["name"], 92 | "root_path": project["root_path"], 93 | "description": project["description"], 94 | "languages": project["languages"], 95 | "last_scan_time": project["last_scan_time"], 96 | } 97 | ) 98 | return result 99 | 100 | 101 | def remove_project(name: str) -> Dict[str, str]: 102 | """ 103 | Remove a project. 104 | 105 | Args: 106 | name: Project name 107 | 108 | Returns: 109 | Success message 110 | """ 111 | # Get dependency from API 112 | project_registry = get_project_registry() 113 | 114 | try: 115 | project_registry.remove_project(name) 116 | return {"status": "success", "message": f"Project '{name}' removed"} 117 | except Exception as e: 118 | raise ProjectError(f"Failed to remove project: {e}") from e 119 | -------------------------------------------------------------------------------- /tests/test_diagnostics/test_ast.py: -------------------------------------------------------------------------------- 1 | """Example of using pytest with diagnostic plugin for testing.""" 2 | 3 | import tempfile 4 | from pathlib import Path 5 | 6 | import pytest 7 | 8 | from mcp_server_tree_sitter.api import get_project_registry 9 | from mcp_server_tree_sitter.language.registry import LanguageRegistry 10 | from tests.test_helpers import get_ast, register_project_tool 11 | 12 | # Load the diagnostic fixture 13 | pytest.importorskip("mcp_server_tree_sitter.testing") 14 | 15 | 16 | @pytest.fixture 17 | def test_project(): 18 | """Create a temporary test project with a sample file.""" 19 | # Set up a temporary directory 20 | with tempfile.TemporaryDirectory() as temp_dir: 21 | project_path = Path(temp_dir) 22 | 23 | # Create a test file 24 | test_file = project_path / "test.py" 25 | with open(test_file, "w") as f: 26 | f.write("def hello():\n print('Hello, world!')\n\nhello()\n") 27 | 28 | # Register project 29 | project_name = "diagnostic_test_project" 30 | register_project_tool(path=str(project_path), name=project_name) 31 | 32 | # Yield the project info 33 | yield {"name": project_name, "path": project_path, "file": "test.py"} 34 | 35 | # Clean up 36 | project_registry = get_project_registry() 37 | try: 38 | project_registry.remove_project(project_name) 39 | except Exception: 40 | pass 41 | 42 | 43 | @pytest.mark.diagnostic 44 | def test_ast_failure(test_project, diagnostic) -> None: 45 | """Test the get_ast functionality.""" 46 | # Add test details to diagnostic data 47 | diagnostic.add_detail("project", test_project["name"]) 48 | diagnostic.add_detail("file", test_project["file"]) 49 | 50 | try: 51 | # Try to get the AST 52 | ast_result = get_ast( 53 | project=test_project["name"], 54 | path=test_project["file"], 55 | max_depth=3, 56 | include_text=True, 57 | ) 58 | 59 | # Add the result to diagnostics 60 | diagnostic.add_detail("ast_result", str(ast_result)) 61 | 62 | # This assertion would fail if there's an issue with AST parsing 63 | assert "tree" in ast_result, "AST result should contain a tree" 64 | 65 | # Check that the tree doesn't contain an error 66 | if isinstance(ast_result["tree"], dict) and "error" in ast_result["tree"]: 67 | raise AssertionError(f"AST tree contains an error: {ast_result['tree']['error']}") 68 | 69 | except Exception as e: 70 | # Record the error in diagnostics 71 | diagnostic.add_error("AstParsingError", str(e)) 72 | 73 | # Create the artifact 74 | artifact = { 75 | "error_type": type(e).__name__, 76 | "error_message": str(e), 77 | "project": test_project["name"], 78 | "file": test_project["file"], 79 | } 80 | diagnostic.add_artifact("ast_failure", artifact) 81 | 82 | # Re-raise to fail the test 83 | raise 84 | 85 | 86 | @pytest.mark.diagnostic 87 | def test_language_detection(diagnostic) -> None: 88 | """Test language detection functionality.""" 89 | registry = LanguageRegistry() 90 | 91 | # Test a few common file extensions 92 | test_files = { 93 | "test.py": "python", 94 | "test.js": "javascript", 95 | "test.ts": "typescript", 96 | "test.unknown": None, 97 | } 98 | 99 | results = {} 100 | failures = [] 101 | 102 | for filename, expected in test_files.items(): 103 | detected = registry.language_for_file(filename) 104 | match = detected == expected 105 | 106 | results[filename] = {"detected": detected, "expected": expected, "match": match} 107 | 108 | if not match: 109 | failures.append(filename) 110 | 111 | # Add all results to diagnostic data 112 | diagnostic.add_detail("detection_results", results) 113 | if failures: 114 | diagnostic.add_detail("failed_files", failures) 115 | 116 | # Check results with proper assertions 117 | for filename, expected in test_files.items(): 118 | assert registry.language_for_file(filename) == expected, f"Language detection failed for {filename}" 119 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | # etc. 177 | results/ 178 | diagnostic_results/ 179 | *.json 180 | -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # MCP Tree-sitter Server: TODO Board 2 | 3 | This Kanban board tracks tasks specifically focused on improving partially working commands and implementing missing features. 4 | 5 | ## In Progress 6 | 7 | ### High Priority 8 | --- 9 | 10 | #### Fix Similar Code Detection 11 | - **Description**: Improve the `find_similar_code` command to reliably return results 12 | - **Tasks**: 13 | - [ ] Debug why command completes but doesn't return results 14 | - [ ] Optimize similarity threshold and matching algorithm 15 | - [ ] Add more detailed logging for troubleshooting 16 | - [ ] Create comprehensive test cases with expected results 17 | - **Acceptance Criteria**: 18 | - Command reliably returns similar code snippets when they exist 19 | - Appropriate feedback when no similar code is found 20 | - Documentation updated with examples and recommended thresholds 21 | - **Complexity**: Medium 22 | - **Dependencies**: None 23 | 24 | #### Complete Tree Editing and Incremental Parsing 25 | - **Description**: Extend AST functionality to support tree manipulation 26 | - **Tasks**: 27 | - [ ] Implement tree editing operations (insert, delete, replace nodes) 28 | - [ ] Add incremental parsing to efficiently update trees after edits 29 | - [ ] Ensure node IDs remain consistent during tree manipulations 30 | - **Acceptance Criteria**: 31 | - Trees can be modified through API calls 32 | - Incremental parsing reduces parse time for small changes 33 | - Proper error handling for invalid modifications 34 | - **Complexity**: High 35 | - **Dependencies**: None 36 | 37 | ### Medium Priority 38 | --- 39 | 40 | #### Implement UTF-16 Support 41 | - **Description**: Add encoding detection and support for UTF-16 42 | - **Tasks**: 43 | - [ ] Implement encoding detection for input files 44 | - [ ] Add UTF-16 to UTF-8 conversion for parser compatibility 45 | - [ ] Handle position mapping between different encodings 46 | - **Acceptance Criteria**: 47 | - Correctly parse and handle UTF-16 encoded files 48 | - Maintain accurate position information in different encodings 49 | - Test suite includes UTF-16 encoded files 50 | - **Complexity**: Medium 51 | - **Dependencies**: None 52 | 53 | #### Add Read Callable Support 54 | - **Description**: Implement custom read strategies for efficient large file handling 55 | - **Tasks**: 56 | - [ ] Create streaming parser interface for large files 57 | - [ ] Implement memory-efficient parsing strategy 58 | - [ ] Add support for custom read handlers 59 | - **Acceptance Criteria**: 60 | - Successfully parse files larger than memory constraints 61 | - Performance tests show acceptable parsing speed 62 | - Documentation on how to use custom read strategies 63 | - **Complexity**: High 64 | - **Dependencies**: None 65 | 66 | ## Ready for Review 67 | 68 | ### High Priority 69 | --- 70 | 71 | #### Complete MCP Context Progress Reporting 72 | - **Description**: Implement progress reporting for long-running operations 73 | - **Tasks**: 74 | - [ ] Add progress tracking to all long-running operations 75 | - [ ] Implement progress callbacks in the MCP context 76 | - [ ] Update API to report progress percentage 77 | - **Acceptance Criteria**: 78 | - Long-running operations report progress 79 | - Progress is visible to the user 80 | - Cancellation is possible for operations in progress 81 | - **Complexity**: Low 82 | - **Dependencies**: None 83 | 84 | ## Done 85 | 86 | *No tasks completed yet* 87 | 88 | ## Backlog 89 | 90 | ### Low Priority 91 | --- 92 | 93 | #### Add Image Handling Support 94 | - **Description**: Implement support for returning images/visualizations from tools 95 | - **Tasks**: 96 | - [ ] Create image generation utilities for AST visualization 97 | - [ ] Add support for returning images in MCP responses 98 | - [ ] Implement SVG or PNG export of tree structures 99 | - **Acceptance Criteria**: 100 | - Tools can return visual representations of code structures 101 | - AST visualizations can be generated and returned 102 | - **Complexity**: Medium 103 | - **Dependencies**: None 104 | 105 | --- 106 | 107 | ## Task Metadata 108 | 109 | ### Priority Levels 110 | - **High**: Critical for core functionality, should be addressed immediately 111 | - **Medium**: Important for comprehensive feature set, address after high priority items 112 | - **Low**: Nice to have, address when resources permit 113 | 114 | ### Complexity Levels 115 | - **Low**: Estimated 1-2 days of work 116 | - **Medium**: Estimated 3-5 days of work 117 | - **High**: Estimated 1-2 weeks of work 118 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/tools/ast_operations.py: -------------------------------------------------------------------------------- 1 | """AST operation tools for MCP server.""" 2 | 3 | import logging 4 | from typing import Any, Dict, Optional 5 | 6 | from ..exceptions import FileAccessError, ParsingError 7 | from ..models.ast import node_to_dict 8 | from ..utils.file_io import read_binary_file 9 | from ..utils.security import validate_file_access 10 | from ..utils.tree_sitter_helpers import ( 11 | parse_source, 12 | ) 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | def get_file_ast( 18 | project: Any, 19 | path: str, 20 | language_registry: Any, 21 | tree_cache: Any, 22 | max_depth: Optional[int] = None, 23 | include_text: bool = True, 24 | ) -> Dict[str, Any]: 25 | """ 26 | Get the AST for a file. 27 | 28 | Args: 29 | project: Project object 30 | path: File path (relative to project root) 31 | language_registry: Language registry 32 | tree_cache: Tree cache instance 33 | max_depth: Maximum depth to traverse the tree 34 | include_text: Whether to include node text 35 | 36 | Returns: 37 | AST as a nested dictionary 38 | 39 | Raises: 40 | FileAccessError: If file access fails 41 | ParsingError: If parsing fails 42 | """ 43 | abs_path = project.get_file_path(path) 44 | 45 | try: 46 | validate_file_access(abs_path, project.root_path) 47 | except Exception as e: 48 | raise FileAccessError(f"Access denied: {e}") from e 49 | 50 | language = language_registry.language_for_file(path) 51 | if not language: 52 | raise ParsingError(f"Could not detect language for {path}") 53 | 54 | tree, source_bytes = parse_file(abs_path, language, language_registry, tree_cache) 55 | 56 | return { 57 | "file": path, 58 | "language": language, 59 | "tree": node_to_dict( 60 | tree.root_node, 61 | source_bytes, 62 | include_children=True, 63 | include_text=include_text, 64 | max_depth=max_depth if max_depth is not None else 5, 65 | ), 66 | } 67 | 68 | 69 | def parse_file(file_path: Any, language: str, language_registry: Any, tree_cache: Any) -> tuple[Any, bytes]: 70 | """ 71 | Parse a file using tree-sitter. 72 | 73 | Args: 74 | file_path: Path to file 75 | language: Language identifier 76 | language_registry: Language registry 77 | tree_cache: Tree cache instance 78 | 79 | Returns: 80 | (Tree, source_bytes) tuple 81 | 82 | Raises: 83 | ParsingError: If parsing fails 84 | """ 85 | # Always check the cache first, even if caching is disabled 86 | # This ensures cache misses are tracked correctly in tests 87 | cached = tree_cache.get(file_path, language) 88 | if cached: 89 | tree, bytes_data = cached 90 | return tree, bytes_data 91 | 92 | try: 93 | # Parse the file using helper 94 | parser = language_registry.get_parser(language) 95 | # Use source directly with parser to avoid parser vs. language confusion 96 | source_bytes = read_binary_file(file_path) 97 | tree = parse_source(source_bytes, parser) 98 | result_tuple = (tree, source_bytes) 99 | 100 | # Cache the tree only if caching is enabled 101 | is_cache_enabled = False 102 | try: 103 | # Get cache enabled state from tree_cache 104 | is_cache_enabled = tree_cache._is_cache_enabled() 105 | except Exception: 106 | # Fallback to instance value if method not available 107 | is_cache_enabled = getattr(tree_cache, "enabled", False) 108 | 109 | # Store in cache only if enabled 110 | if is_cache_enabled: 111 | tree_cache.put(file_path, language, tree, source_bytes) 112 | 113 | return result_tuple 114 | except Exception as e: 115 | raise ParsingError(f"Error parsing {file_path}: {e}") from e 116 | 117 | 118 | def find_node_at_position(root_node: Any, row: int, column: int) -> Optional[Any]: 119 | """ 120 | Find the most specific node at a given position. 121 | 122 | Args: 123 | root_node: Root node to search from 124 | row: Row (line) number, 0-based 125 | column: Column number, 0-based 126 | 127 | Returns: 128 | Node at position or None if not found 129 | """ 130 | from ..models.ast import find_node_at_position as find_node 131 | 132 | return find_node(root_node, row, column) 133 | -------------------------------------------------------------------------------- /tests/test_config_manager.py: -------------------------------------------------------------------------------- 1 | """Tests for the new ConfigurationManager class.""" 2 | 3 | import os 4 | import tempfile 5 | 6 | import pytest 7 | import yaml 8 | 9 | # Import will fail initially until we implement the class 10 | 11 | 12 | @pytest.fixture 13 | def temp_yaml_file(): 14 | """Create a temporary YAML file with test configuration.""" 15 | with tempfile.NamedTemporaryFile(suffix=".yaml", mode="w+", delete=False) as temp_file: 16 | test_config = { 17 | "cache": {"enabled": True, "max_size_mb": 256, "ttl_seconds": 3600}, 18 | "security": {"max_file_size_mb": 10, "excluded_dirs": [".git", "node_modules", "__pycache__", ".cache"]}, 19 | "language": {"auto_install": True, "default_max_depth": 7}, 20 | } 21 | yaml.dump(test_config, temp_file) 22 | temp_file.flush() 23 | temp_file_path = temp_file.name 24 | 25 | yield temp_file_path 26 | 27 | # Clean up 28 | os.unlink(temp_file_path) 29 | 30 | 31 | def test_config_manager_initialization(): 32 | """Test that ConfigurationManager initializes with default config.""" 33 | # This test will fail until we implement ConfigurationManager 34 | from mcp_server_tree_sitter.config import ConfigurationManager 35 | 36 | manager = ConfigurationManager() 37 | config = manager.get_config() 38 | 39 | # Check default values 40 | assert config.cache.max_size_mb == 100 41 | assert config.security.max_file_size_mb == 5 42 | assert config.language.default_max_depth == 5 43 | 44 | 45 | def test_config_manager_load_from_file(temp_yaml_file): 46 | """Test loading configuration from a file.""" 47 | # This test will fail until we implement ConfigurationManager 48 | from mcp_server_tree_sitter.config import ConfigurationManager 49 | 50 | manager = ConfigurationManager() 51 | manager.load_from_file(temp_yaml_file) 52 | config = manager.get_config() 53 | 54 | # Check loaded values 55 | assert config.cache.max_size_mb == 256 56 | assert config.security.max_file_size_mb == 10 57 | assert config.language.default_max_depth == 7 58 | 59 | 60 | def test_config_manager_update_values(): 61 | """Test updating individual configuration values.""" 62 | # This test will fail until we implement ConfigurationManager 63 | from mcp_server_tree_sitter.config import ConfigurationManager 64 | 65 | manager = ConfigurationManager() 66 | 67 | # Update values 68 | manager.update_value("cache.max_size_mb", 512) 69 | manager.update_value("security.max_file_size_mb", 20) 70 | 71 | # Check updated values 72 | config = manager.get_config() 73 | assert config.cache.max_size_mb == 512 74 | assert config.security.max_file_size_mb == 20 75 | 76 | 77 | def test_config_manager_to_dict(): 78 | """Test converting configuration to dictionary.""" 79 | # This test will fail until we implement ConfigurationManager 80 | from mcp_server_tree_sitter.config import ConfigurationManager 81 | 82 | manager = ConfigurationManager() 83 | config_dict = manager.to_dict() 84 | 85 | # Check dictionary structure 86 | assert "cache" in config_dict 87 | assert "security" in config_dict 88 | assert "language" in config_dict 89 | assert config_dict["cache"]["max_size_mb"] == 100 90 | 91 | 92 | def test_env_overrides_defaults(monkeypatch): 93 | """Environment variables should override hard-coded defaults.""" 94 | monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "512") 95 | 96 | from mcp_server_tree_sitter.config import ConfigurationManager 97 | 98 | mgr = ConfigurationManager() 99 | cfg = mgr.get_config() 100 | 101 | assert cfg.cache.max_size_mb == 512, "Environment variable should override default value" 102 | # ensure other defaults stay intact 103 | assert cfg.security.max_file_size_mb == 5 104 | assert cfg.language.default_max_depth == 5 105 | 106 | 107 | def test_env_overrides_yaml(temp_yaml_file, monkeypatch): 108 | """Environment variables should take precedence over YAML values.""" 109 | # YAML sets 256; env var must win with 1024 110 | monkeypatch.setenv("MCP_TS_CACHE_MAX_SIZE_MB", "1024") 111 | monkeypatch.setenv("MCP_TS_SECURITY_MAX_FILE_SIZE_MB", "15") 112 | 113 | from mcp_server_tree_sitter.config import ConfigurationManager 114 | 115 | mgr = ConfigurationManager() 116 | mgr.load_from_file(temp_yaml_file) 117 | cfg = mgr.get_config() 118 | 119 | assert cfg.cache.max_size_mb == 1024, "Environment variable should override YAML value" 120 | assert cfg.security.max_file_size_mb == 15, "Environment variable should override YAML value" 121 | -------------------------------------------------------------------------------- /src/mcp_server_tree_sitter/utils/file_io.py: -------------------------------------------------------------------------------- 1 | """Utilities for safe file operations. 2 | 3 | This module provides safe file I/O operations with proper encoding handling 4 | and consistent interfaces for both text and binary operations. 5 | """ 6 | 7 | from pathlib import Path 8 | from typing import List, Optional, Tuple, Union 9 | 10 | 11 | def read_text_file(path: Union[str, Path]) -> List[str]: 12 | """ 13 | Safely read a text file with proper encoding handling. 14 | 15 | Args: 16 | path: Path to the file 17 | 18 | Returns: 19 | List of lines from the file 20 | """ 21 | with open(str(path), "r", encoding="utf-8", errors="replace") as f: 22 | return f.readlines() 23 | 24 | 25 | def read_binary_file(path: Union[str, Path]) -> bytes: 26 | """ 27 | Safely read a binary file. 28 | 29 | Args: 30 | path: Path to the file 31 | 32 | Returns: 33 | File contents as bytes 34 | """ 35 | with open(str(path), "rb") as f: 36 | return f.read() 37 | 38 | 39 | def get_file_content_and_lines(path: Union[str, Path]) -> Tuple[bytes, List[str]]: 40 | """ 41 | Get both binary content and text lines from a file. 42 | 43 | Args: 44 | path: Path to the file 45 | 46 | Returns: 47 | Tuple of (binary_content, text_lines) 48 | """ 49 | binary_content = read_binary_file(path) 50 | text_lines = read_text_file(path) 51 | return binary_content, text_lines 52 | 53 | 54 | def is_line_comment(line: str, comment_prefix: str) -> bool: 55 | """ 56 | Check if a line is a comment. 57 | 58 | Args: 59 | line: The line to check 60 | comment_prefix: Comment prefix character(s) 61 | 62 | Returns: 63 | True if the line is a comment 64 | """ 65 | return line.strip().startswith(comment_prefix) 66 | 67 | 68 | def count_comment_lines(lines: List[str], comment_prefix: str) -> int: 69 | """ 70 | Count comment lines in a file. 71 | 72 | Args: 73 | lines: List of lines to check 74 | comment_prefix: Comment prefix character(s) 75 | 76 | Returns: 77 | Number of comment lines 78 | """ 79 | return sum(1 for line in lines if is_line_comment(line, comment_prefix)) 80 | 81 | 82 | def get_comment_prefix(language: str) -> Optional[str]: 83 | """ 84 | Get the comment prefix for a language. 85 | 86 | Args: 87 | language: Language identifier 88 | 89 | Returns: 90 | Comment prefix or None if unknown 91 | """ 92 | # Language-specific comment detection 93 | comment_starters = { 94 | "python": "#", 95 | "javascript": "//", 96 | "typescript": "//", 97 | "java": "//", 98 | "c": "//", 99 | "cpp": "//", 100 | "go": "//", 101 | "ruby": "#", 102 | "rust": "//", 103 | "php": "//", 104 | "swift": "//", 105 | "kotlin": "//", 106 | "scala": "//", 107 | "bash": "#", 108 | "shell": "#", 109 | "yaml": "#", 110 | "html": "