├── .gitignore
├── LICENSE
├── Project_Indexer.py
├── README.md
├── cSharpIndexer
    ├── cSharpIndexer.py
    └── indexer_dependencies.csproj
├── memory-bank
    ├── productContext.md
    └── systemPatterns.md
├── parser
    ├── __init__.py
    ├── csharp_parser.py
    ├── parser.py
    ├── python_parser.py
    └── typescript_parser.py
├── requirements.txt
└── test
    ├── benchmark_parser.py
    ├── resources
        ├── test.cs
        ├── test.ts
        └── test.tsx
    └── test_parser.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Project Specific  
 7 | ProjectIndex.json
 8 | 
 9 | # IDEs and Editors
10 | .vscode/
11 | .idea/
12 | 
13 | # Virtual Environment  
14 | venv/
15 | 
16 | # roo code
17 | memory-bank/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Dolfie-01
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Project_Indexer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import argparse
 4 | 
 5 | from parser import extract_types_and_members_from_file_for_typescript
 6 | from parser.parser import extract_types_and_members_from_file_for_csharp, extract_types_and_members_from_file_for_python
 7 | 
 8 | def index_project_structure(root_dir: str, extract_imports: bool = False):
 9 |     """
10 |     Walks through the directory tree starting at root_dir.
11 |     Extracts type definitions and members from each C# or Python file and creates a structured index.
12 |     """
13 |     project_index = {}
14 |     print(f"Indexing project structure starting at: {root_dir}")
15 |     # Walk through the directory tree
16 |     for subdir, _, files in os.walk(root_dir):
17 |         for file in files:
18 |             # Process only C# and Python files
19 |             if not file.endswith('.cs') and not file.endswith('.py') and not file.endswith('.tsx') and not file.endswith('.ts'):
20 |                 print(f"Skipping non-C# or non-Python file or non-Typescript: {file}")
21 |                 continue
22 |             # Construct the full file path and relative path
23 |             file_path = os.path.join(subdir, file)
24 |             relative_path = os.path.relpath(file_path, root_dir)
25 |             if file.endswith('.cs'):
26 |                 # Extract C# types and members
27 |                 details = extract_types_and_members_from_file_for_csharp(file_path)
28 |             elif file.endswith('.py'):
29 |                 # Extract Python types and members
30 |                 details = extract_types_and_members_from_file_for_python(file_path, extract_imports)
31 |             elif file.endswith('.tsx') or file.endswith('.ts'):
32 |                 # Extract TypeScript types and members
33 |                 details = extract_types_and_members_from_file_for_typescript(file_path)
34 |             # Include in the index only if any type or member was found
35 |             project_index_details = details.__to_dict__()
36 |             if any(project_index_details.values()):
37 |                 project_index[relative_path] = project_index_details
38 |     return project_index
39 | 
40 | if __name__ == "__main__":
41 |     # Specify pwd as default root directory and argument --path if provided
42 |     root_directory = os.getcwd()  # Default to current working directory
43 |     # Check if a path argument is provided 
44 |     parser = argparse.ArgumentParser(description='Index project structure for C# and Python files.')
45 |     parser.add_argument('--path', type=str, help='Path to the project directory to index')
46 |     parser.add_argument('--imports', action='store_true', help='Extract imports from Python files', default=False)
47 |     args = parser.parse_args()
48 |     if args.path:
49 |         root_directory = args.path
50 |     # Check if the provided path exists
51 |     if not os.path.exists(root_directory):
52 |         print(f"Provided path does not exist: {root_directory}")
53 |         exit(1)
54 |     # Check if the provided path is a directory
55 |     if not os.path.isdir(root_directory):
56 |         print(f"Provided path is not a directory: {root_directory}")
57 |         exit(1)
58 |     # Index the project structure starting at the specified root directory  
59 |     index = index_project_structure(root_directory, args.imports)
60 |     # Export file renamed to ProjectIndex.json
61 |     export_filename = f"{root_directory}/ProjectIndex.json"
62 |     with open(export_filename, 'w', encoding='utf-8') as index_file:
63 |         json.dump(index, index_file, indent=4)
64 |     print(f"Project structure indexed successfully and exported to {export_filename}.")


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Project Indexer
  2 | 
  3 | ## Overview
  4 | 
  5 | `Project Indexer` is a simple script designed to index the locations of classes, files, and other components within a project. This indexing allows **Large Language Models (LLMs)** to quickly understand the project's structure, enabling **D.R.Y. (Don't Repeat Yourself) coding** by preventing redundant object creation and avoiding full project scans.
  6 | 
  7 | By providing an organized index, `Project Indexer` enhances the LLM's understanding from the outset. When combined with a **well-structured design document** containing patterns and examples, it facilitates **efficient onboarding** for new tasks or ongoing development in an active codebase.
  8 | 
  9 | The tool currently supports both C# and Python files, with specialized regex patterns for each language's syntax. The index includes:
 10 | 
 11 | - For C#: classes, structs, interfaces, enums, and methods
 12 | - For Python:
 13 |   - Classes (including decorated classes)
 14 |   - Functions (including decorated functions)
 15 |   - Methods (instance, class and static methods)
 16 |   - Imports (absolute and relative)
 17 |   - Docstrings (as metadata)
 18 | 
 19 | 
 20 | ## Why Use ProjectIndexer?
 21 | 
 22 | - **Reduces unnecessary LLM scanning**: Saves token usage compared to constant back-and-forth querying.
 23 | - **Prevents hallucinations**: Helps LLMs recognize what exists in the codebase.
 24 | - **Speeds up development**: Ideal for large projects with many files and classes.
 25 | - **Compatible with multiple LLMs**, including:
 26 |   - RooCode - the Awesome VSCode AI Developement Extension.
 27 |   - Gemini (All)
 28 |   - Sonnet (All)
 29 |   - o3-mini and o3-mini-high
 30 |   - Deepseek V3.1 & Deepseek R1
 31 |   - GPT models
 32 |   - Grok
 33 | 
 34 | ## Features
 35 | 
 36 | - **Creates** a **.json file** listing class names, methods and locations
 37 | - **Multi-language support**: Works with both C# and Python codebases
 38 | - **Tree-sitter parsing**: More accurate than regex for complex syntax cases
 39 | - **Lightweight**: Only indexes names and locations, not full class properties or implementations
 40 | - **Supports integration with RooCode**: In some cases, you can configure `Code/Architect` mode to frequently refer to `ProjectIndex.json`. You may also set it to **run periodically** upon reaching milestones or creating new tasks
 41 | 
 42 | > **Note:** This tool was built for personal use due to managing a **VS Solution with 5 projects and over 600 classes/methods**.
 43 | 
 44 | ## Parser Architecture
 45 | 
 46 | The project uses a modular parser system with Tree-sitter for accurate syntax parsing:
 47 | 
 48 | - `parser/__init__.py`: Main parser interface and Tree-sitter grammar initialization
 49 | - `parser/csharp_parser.py`: Handles C# specific parsing using Tree-sitter
 50 | - `parser/python_parser.py`: Handles Python specific parsing using Tree-sitter
 51 | 
 52 | Each parser implements:
 53 | 
 54 | - File extension detection
 55 | - Tree-sitter based parsing with language grammars
 56 | - Common output format (JSON)
 57 | 
 58 | ### Tree-sitter Integration
 59 | 
 60 | The project now uses [Tree-sitter](https://tree-sitter.github.io/tree-sitter/) for more accurate parsing:
 61 | 
 62 | - **Pre-loaded grammars** for Python and C# at startup
 63 | - **Faster parsing** by avoiding regex pattern matching
 64 | - **More reliable** extraction of code structures
 65 | - **Better handling** of complex syntax cases
 66 | 
 67 | Required dependencies (automatically installed via requirements.txt):
 68 | 
 69 | - tree-sitter==0.24.0
 70 | - tree-sitter-python==0.23.6
 71 | - tree-sitter-c-sharp==0.23.1
 72 | 
 73 | ## Installation & Usage
 74 | 
 75 | 1. Drop `Project_Indexer.py` into the root of your project
 76 | 
 77 | 2. Configure the root directory:
 78 | 
 79 |    - Open `Project_Indexer.py`
 80 |    - On line 54, set `root_directory` to your project's root path
 81 |    - Example for Windows: `"C:\\MyProject"`
 82 |    - Example for Mac/Linux: `"/Users/name/MyProject"`
 83 | 
 84 | 3. Run the script via command line:
 85 | 
 86 | ```sh
 87 | # Using --path argument to specify project directory
 88 | python Project_Indexer.py --path /path/to/your/project
 89 | # Using --imports to specify extracting imported libraries/methods in each file (only supported for python right now)
 90 | python Project_Indexer.py --path /path/to/your/project --imports
 91 | # Without arguments (uses hardcoded path in script)
 92 | python Project_Indexer.py
 93 | ```
 94 | 
 95 | 3.  The script will generate `ProjectIndex.json`.
 96 | 
 97 | 4.  Direct your LLM to read `ProjectIndex.json` for efficient project awareness.
 98 | 
 99 | ## C# Project Indexer
100 | 
101 | 1. This is a highly specialized indexer for **C# files only**. It also reads and indexes `.razor` files for Blazor projects.
102 | 2. It uses the **PythonNET** library to interface with **Roslyn analyzers** to "walk the tree," resulting in a highly compressed `ProjectIndex.json` file.
103 | 3. Place the script in the **root directory** of your project.
104 | 4. Run the following command to generate the index:
105 |    ```python cSharpIndexer.py ```
106 | 5. If Roslyn dependencies are missing, the script will attempt to install them automatically. Manual installation may be required in some cases.
107 | 6. Pull requests and contributions are welcome — this tool is highly experimental.
108 | 
109 | 
110 | 
111 | I hope this is useful.
112 | 
113 | Happy coding!
114 | 
115 | ## Contributing
116 | 
117 | Feel free to fork the repository and submit pull requests for improvements!
118 | 
119 | ## License
120 | 
121 | This project is licensed under the [MIT License](LICENSE).
122 | 
123 | ---
124 | 


--------------------------------------------------------------------------------
/cSharpIndexer/cSharpIndexer.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from pathlib import Path
  3 | import time
  4 | import argparse
  5 | import os
  6 | import sys
  7 | 
  8 | # --- Set Environment Variable for Pythonnet Runtime ---
  9 | print("Setting PYTHONNET_RUNTIME=coreclr environment variable...")
 10 | os.environ['PYTHONNET_RUNTIME'] = 'coreclr'
 11 | 
 12 | # --- Pythonnet Setup ---
 13 | try:
 14 |     import clr
 15 |     print("pythonnet library imported.")
 16 |     runtime_env = os.environ.get('PYTHONNET_RUNTIME')
 17 |     if runtime_env and runtime_env.lower() == 'coreclr':
 18 |         print("Confirmed PYTHONNET_RUNTIME=coreclr is set.")
 19 |     else:
 20 |         print("Warning: PYTHONNET_RUNTIME might not be 'coreclr'. Issues may occur.")
 21 | 
 22 | except ImportError:
 23 |     print("Error: pythonnet library not found.")
 24 |     print("Please install it using: pip install pythonnet")
 25 |     sys.exit(1)
 26 | except Exception as e:
 27 |     print(f"Error during pythonnet import or runtime check: {e}")
 28 |     sys.exit(1)
 29 | 
 30 | 
 31 | # --- Load Roslyn Assemblies ---
 32 | roslyn_loaded = False
 33 | try:
 34 |     roslyn_version = "4.8.0"
 35 |     target_framework = "netstandard2.0"
 36 |     nuget_base_path = Path.home() / ".nuget" / "packages"
 37 | 
 38 |     # Construct correct paths for the assemblies
 39 |     analysis_common_pkg_path = nuget_base_path / "microsoft.codeanalysis.common" / roslyn_version / "lib" / target_framework
 40 |     analysis_dll = analysis_common_pkg_path / "Microsoft.CodeAnalysis.dll"
 41 | 
 42 |     csharp_pkg_path = nuget_base_path / "microsoft.codeanalysis.csharp" / roslyn_version / "lib" / target_framework
 43 |     csharp_dll = csharp_pkg_path / "Microsoft.CodeAnalysis.CSharp.dll"
 44 | 
 45 |     # Debug prints for paths
 46 |     print(f"Attempting to load Roslyn v{roslyn_version} from NuGet cache:")
 47 |     print(f" - Analysis DLL: {analysis_dll}")
 48 |     print(f" - CSharp DLL: {csharp_dll}")
 49 | 
 50 |     # Verify paths exist
 51 |     if not analysis_dll.exists():
 52 |         raise FileNotFoundError(f"Microsoft.CodeAnalysis.dll not found at expected NuGet path: {analysis_dll}")
 53 |     if not csharp_dll.exists():
 54 |         raise FileNotFoundError(f"Microsoft.CodeAnalysis.CSharp.dll not found at expected NuGet path: {csharp_dll}")
 55 | 
 56 |     # Load the assemblies
 57 |     clr.AddReference(str(analysis_dll))
 58 |     clr.AddReference(str(csharp_dll))
 59 |     roslyn_loaded = True
 60 |     print("Roslyn assemblies loaded successfully from NuGet cache.")
 61 | 
 62 |     # Import namespaces after loading assemblies
 63 |     import Microsoft.CodeAnalysis as MSAnalysis
 64 |     import Microsoft.CodeAnalysis.CSharp as MSCSharp
 65 |     import Microsoft.CodeAnalysis.CSharp.Syntax as MSSyntax
 66 |     print("Roslyn namespaces imported successfully.")
 67 | 
 68 | except Exception as e:
 69 |     print(f"Error: Failed to load or import Roslyn assemblies. {e}")
 70 |     sys.exit(1)
 71 | 
 72 | 
 73 | def get_parent_context(node):
 74 |     """Finds the parent context (namespace, class, etc.) for a given node."""
 75 |     parent = node.Parent
 76 |     while parent is not None:
 77 |         if isinstance(parent, MSSyntax.BaseTypeDeclarationSyntax):  # Class, Struct, Interface, Enum, Record
 78 |             return parent.Identifier.ValueText
 79 |         elif isinstance(parent, MSSyntax.NamespaceDeclarationSyntax):
 80 |             return parent.Name.ToString()
 81 |         parent = parent.Parent
 82 |     return None
 83 | 
 84 | 
 85 | def index_csharp_and_razor_files(root_dir, output_file="ProjectIndex.json"):
 86 |     """Indexes C# and Razor files in the given directory."""
 87 |     start_time = time.time()
 88 |     all_definitions = []
 89 |     root_path = Path(root_dir).resolve()
 90 | 
 91 |     print(f"Starting indexing in: {root_path}")
 92 | 
 93 |     # Find all .cs and .razor files
 94 |     all_files = list(root_path.rglob("*.cs")) + list(root_path.rglob("*.razor"))
 95 |     print(f"Found {len(all_files)} .cs and .razor files.")
 96 | 
 97 |     excluded_dir_names = {"obj", "bin"}
 98 |     files_to_process = [f for f in all_files if not any(part.lower() in excluded_dir_names for part in f.parts)]
 99 | 
100 |     for file_path in files_to_process:
101 |         relative_path_str = str(file_path.relative_to(root_path)).replace("\\", "/")
102 |         file_extension = file_path.suffix.lower()
103 | 
104 |         try:
105 |             content = file_path.read_text(encoding="utf-8")
106 | 
107 |             if file_extension == ".cs":
108 |                 parse_options = MSCSharp.CSharpParseOptions(languageVersion=MSCSharp.LanguageVersion.Latest)
109 |                 syntax_tree = MSCSharp.CSharpSyntaxTree.ParseText(content, options=parse_options, path=str(file_path))
110 |                 root = syntax_tree.GetRoot()
111 |                 
112 |                 for node in root.DescendantNodes():
113 |                     # Only index high-level definitions
114 |                     if isinstance(node, MSSyntax.ClassDeclarationSyntax):
115 |                         name = node.Identifier.ValueText
116 |                         parent = get_parent_context(node)
117 |                         # Add class definition as [name, path, parent]
118 |                         all_definitions.append([name, relative_path_str, parent])
119 |                     elif isinstance(node, MSSyntax.NamespaceDeclarationSyntax):
120 |                         name = node.Name.ToString()
121 |                         # Add namespace definition as [name, path, None]
122 |                         all_definitions.append([name, relative_path_str, None])
123 | 
124 |             elif file_extension == ".razor":
125 |                 # Add Razor file path only as a string
126 |                 all_definitions.append(relative_path_str)
127 | 
128 |         except Exception as e:
129 |             print(f"Error processing file {relative_path_str}: {e}")
130 | 
131 |     # Output results
132 |     try:
133 |         output_path = root_path / output_file
134 |         with open(output_path, "w", encoding="utf-8") as f:
135 |             json.dump(all_definitions, f, separators=(',', ':'))  # Compact JSON formatting
136 |         print(f"Indexing completed successfully. Output written to {output_path}")
137 |     except Exception as e:
138 |         print(f"Error writing output file: {e}")
139 | 
140 |     print(f"Indexing completed in {time.time() - start_time:.2f} seconds.")
141 | 
142 | 
143 | if __name__ == "__main__":
144 |     parser = argparse.ArgumentParser(description="Index C# and Razor project definitions.")
145 |     parser.add_argument(
146 |         "root_dir",
147 |         nargs="?",
148 |         default=".",
149 |         help="Root directory of the C# project/solution (default: current directory)."
150 |     )
151 |     parser.add_argument(
152 |         "-o", "--output",
153 |         default="ProjectIndex.json",
154 |         help="Output JSON file name (default: ProjectIndex.json)."
155 |     )
156 |     args = parser.parse_args()
157 | 
158 |     target_dir = Path(args.root_dir)
159 |     if not target_dir.is_dir():
160 |         print(f"Error: Provided root directory '{args.root_dir}' not found or is not a directory.")
161 |         sys.exit(1)
162 | 
163 |     index_csharp_and_razor_files(target_dir, args.output)


--------------------------------------------------------------------------------
/cSharpIndexer/indexer_dependencies.csproj:
--------------------------------------------------------------------------------
 1 | <Project Sdk="Microsoft.NET.Sdk">
 2 | 
 3 |   <PropertyGroup>
 4 |     <TargetFramework>net8.0</TargetFramework> <!-- Use a recent stable TFM, net8.0 is good -->
 5 |     <!-- This project doesn't produce output, it's just for dependencies -->
 6 |     <NoBuild>true</NoBuild>
 7 |     <IncludeBuildOutput>false</IncludeBuildOutput>
 8 |     <EnableDefaultItems>false</EnableDefaultItems>
 9 |   </PropertyGroup>
10 | 
11 |   <ItemGroup>
12 |     <!-- Reference the C# Roslyn package. Use a specific version known to work well. -->
13 |     <!-- Check https://www.nuget.org/packages/Microsoft.CodeAnalysis.CSharp for latest versions -->
14 |     <PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.8.0" />
15 |     <!-- This transitively includes Microsoft.CodeAnalysis -->
16 |   </ItemGroup>
17 | 
18 | </Project>


--------------------------------------------------------------------------------
/memory-bank/productContext.md:
--------------------------------------------------------------------------------
 1 | # Product Context
 2 | 
 3 | ## Project Overview
 4 | 
 5 | - Name: ProjectIndexer
 6 | - Purpose: Develop a CLI tool to index and organize project files across multiple programming languages (Python, C#, Java, JavaScript) for enhanced searchability and code understanding.
 7 | - Key Features: Multi-language file indexing (Python, C#, Java, JS), robust search, code metadata extraction, CLI interface.
 8 | 
 9 | ## Technical Stack
10 | 
11 | - Target Languages: Python, C#, Java, JavaScript (Initial focus: Python)
12 | - Core Language: Python (for CLI and orchestration)
13 | - Dependencies: TBD
14 | 
15 | ## Architecture
16 | 
17 | - Components: CLI interface, Core orchestrator, Language-specific parsers/indexers (Strategy Pattern likely needed), Unified search engine, Data storage (for index)
18 | - Data Flow: TBD
19 | 


--------------------------------------------------------------------------------
/memory-bank/systemPatterns.md:
--------------------------------------------------------------------------------
 1 | # System Patterns
 2 | 
 3 | ## ProjectIndexer Architecture Patterns
 4 | 
 5 | ### File Processing Pipeline
 6 | 
 7 | - **Pattern**: Sequential processing pipeline
 8 | - **Components**: Scanner → Indexer → Search Engine
 9 | - **Characteristics**:
10 |   - Each component has single responsibility
11 |   - Data flows linearly between components
12 |   - Easy to add new processing steps
13 | 
14 | ### Indexing Strategy
15 | 
16 | - **Pattern**: Inverted index
17 | - **Characteristics**:
18 |   - Efficient for text search
19 |   - Scales well with document count
20 |   - Supports partial matches
21 | 
22 | ### Error Handling
23 | 
24 | - **Pattern**: Fail-fast with recovery
25 | - **Characteristics**:
26 |   - Validate inputs early
27 |   - Log detailed errors
28 |   - Continue processing other files if one fails
29 | 
30 | ### Language Handling Strategy
31 | 
32 | - **Pattern**: Strategy Pattern (Recommended)
33 | - **Context**: The system needs to parse and index files from multiple programming languages (Python, C#, Java, JavaScript), each requiring specific logic.
34 | - **Solution**: Define a common interface for language processors (parsers/indexers). Implement concrete strategies for each supported language. A central orchestrator or factory selects the appropriate strategy based on file type or language detection.
35 | - **Characteristics**:
36 |   - Encapsulates language-specific logic.
37 |   - Easily extensible to support new languages without modifying core components.
38 |   - Promotes separation of concerns.
39 | 


--------------------------------------------------------------------------------
/parser/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tree_sitter import Language, Parser
 3 | import tree_sitter_python
 4 | import tree_sitter_c_sharp
 5 | import tree_sitter_typescript
 6 | 
 7 | # Initialize Tree-sitter languages
 8 | PYTHON_LANGUAGE = None
 9 | CSHARP_LANGUAGE = None
10 | TYPESCRIPT_LANGUAGE = None
11 | TSX_LANGUAGE = None
12 | 
13 | def initialize_grammars():
14 |     """Initialize Tree-sitter language grammars"""
15 |     global PYTHON_LANGUAGE, CSHARP_LANGUAGE, TYPESCRIPT_LANGUAGE, TSX_LANGUAGE
16 |     
17 |     try:
18 |         PYTHON_LANGUAGE = Language(tree_sitter_python.language())
19 |     except Exception as e:
20 |         raise RuntimeError(f"Failed to load Python grammar: {e}")
21 | 
22 |     try:
23 |         CSHARP_LANGUAGE = Language(tree_sitter_c_sharp.language())
24 |     except Exception as e:
25 |         raise RuntimeError(f"Failed to load C# grammar: {e}")
26 | 
27 |     try:
28 |         TYPESCRIPT_LANGUAGE = Language(tree_sitter_typescript.language_typescript())
29 |     except Exception as e:
30 |         raise RuntimeError(f"Failed to load TypeScript grammar: {e}")
31 | 
32 |     try:
33 |         TSX_LANGUAGE = Language(tree_sitter_typescript.language_tsx())
34 |     except Exception as e:
35 |         raise RuntimeError(f"Failed to load TSX grammar: {e}")
36 | 
37 | def grammars_loaded():
38 |     """Check if grammars are loaded"""
39 |     return (PYTHON_LANGUAGE is not None and
40 |             CSHARP_LANGUAGE is not None and
41 |             TYPESCRIPT_LANGUAGE is not None and
42 |             TSX_LANGUAGE is not None)
43 | 
44 | # Initialize grammars when module is imported
45 | initialize_grammars()
46 | 
47 | # Import parser functions
48 | from .parser import *
49 | from .python_parser import *
50 | from .csharp_parser import *
51 | from .typescript_parser import * # Added for future TypeScript parser


--------------------------------------------------------------------------------
/parser/csharp_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from tree_sitter import Parser, Query
  3 | from . import CSHARP_LANGUAGE
  4 | 
  5 | # Query definitions as class-level constants
  6 | CLASS_QUERY_STR = """
  7 |     (class_declaration
  8 |         name: (identifier) @class_name
  9 |         (base_list)? @bases
 10 |         body: (declaration_list) @class_body) @class_def
 11 | """
 12 | 
 13 | STRUCT_QUERY_STR = """
 14 |     (struct_declaration
 15 |         name: (identifier) @struct_name
 16 |         (base_list)? @bases
 17 |         body: (declaration_list) @struct_body) @struct_def
 18 | """
 19 | 
 20 | INTERFACE_QUERY_STR = """
 21 |     (interface_declaration
 22 |         name: (identifier) @interface_name
 23 |         (base_list)? @bases
 24 |         body: (declaration_list) @interface_body) @interface_def
 25 | """
 26 | 
 27 | ENUM_QUERY_STR = """
 28 |     (enum_declaration
 29 |         name: (identifier) @enum_name
 30 |         body: (enum_member_declaration_list) @enum_body) @enum_def
 31 | """
 32 | 
 33 | METHOD_QUERY_STR = """
 34 |     (method_declaration
 35 |         (type)? @return_type
 36 |         name: (identifier) @method_name
 37 |         (parameter_list) @params
 38 |         body: (block)? @method_body) @method_def
 39 | """
 40 | 
 41 | class C_Sharp_Result:
 42 |     def __init__(self):
 43 |         self.classes = []
 44 |         self.structs = []
 45 |         self.interfaces = []
 46 |         self.enums = []
 47 |         
 48 |     def __to_dict__(self):
 49 |         result = {}
 50 |         if self.classes:
 51 |             result['classes'] = self.classes
 52 |         if self.structs:
 53 |             result['structs'] = self.structs
 54 |         if self.interfaces:
 55 |             result['interfaces'] = self.interfaces
 56 |         if self.enums:
 57 |             result['enums'] = self.enums
 58 |         return result
 59 |     
 60 | def process_method_node(method_node):
 61 |     """Process a method node and extract its information.
 62 |     
 63 |     Args:
 64 |         method_node: The tree-sitter node representing a method
 65 |         
 66 |     Returns:
 67 |         dict: Method information including name, parameters, return type and modifiers
 68 |     """
 69 |     if not method_node:
 70 |         return None
 71 |         
 72 |     method_info = {}
 73 |     method_info['name'] = method_node.child_by_field_name('name').text.decode('utf8')
 74 |     
 75 |     raw_parameters = method_node.child_by_field_name('parameters')
 76 |     parameters = ','.join([p.text.decode('utf8') for p in raw_parameters.children if p.type == 'parameter'])
 77 |     if len(parameters) > 0:
 78 |         method_info['parameters'] = parameters
 79 |         
 80 |     type = method_node.child_by_field_name('type')
 81 |     if type:
 82 |         method_info['return_type'] = type.text.decode('utf8')
 83 |         
 84 |     modifiers_node = method_node.child_by_field_name('modifiers')
 85 |     if modifiers_node:
 86 |         method_info['modifiers'] = [m.text.decode('utf8') for m in modifiers_node.children]
 87 |     return method_info
 88 | 
 89 | def _should_skip_file(file_path: str) -> bool:
 90 |     """Check if the file should be skipped based on its extension.
 91 | 
 92 |     Args:
 93 |         file_path: Path to the file
 94 | 
 95 |     Returns:
 96 |         bool: True if the file should be skipped, False otherwise
 97 |     """
 98 |     file_extension_supported = file_path.endswith(".cs") or file_path.endswith(".h")
 99 |     return not file_extension_supported or 'node_modules' in file_path or 'dist' in file_path or 'build' in file_path
100 | 
101 | def _read_and_validate_file(file_path: str) -> str:
102 |     """Read and validate a C# source file.
103 |     
104 |     Args:
105 |         file_path: Path to the C# file
106 |         
107 |     Returns:
108 |         str: The file content if valid, None otherwise
109 |     """
110 |     try:
111 |         if _should_skip_file(file_path):
112 |             return None
113 | 
114 |         with open(file_path, 'r', encoding='utf-8') as f:
115 |             source_code = f.read()
116 |             
117 |         return source_code if source_code else None
118 |             
119 |     except Exception as e:
120 |         print(f"Error reading file {file_path}: {str(e)}")
121 |         return None
122 | 
123 | def _initialize_parser(source_code: str) -> tuple:
124 |     """Initialize the tree-sitter parser and parse the source code.
125 |     
126 |     Args:
127 |         source_code: The C# source code to parse
128 |         
129 |     Returns:
130 |         tuple: (Parser, Tree) objects
131 |     """
132 |     parser = Parser(language=CSHARP_LANGUAGE)
133 |     tree = parser.parse(bytes(source_code, 'utf8'))
134 |     return parser, tree
135 | 
136 | def _process_class(struct_node, method_query, result):
137 |     """Process a class node and extract its information.
138 |     
139 |     Args:
140 |         class_node: The tree-sitter node representing a class
141 |         method_query: The method query object
142 |         result: The C_Sharp_Result object to populate
143 |         
144 |     Returns:
145 |         dict: Class information including name, bases and methods
146 |     """
147 |     class_info = {
148 |         'name': struct_node.child_by_field_name('name').text.decode('utf8')
149 |     }
150 |     
151 |     bases_node = struct_node.child_by_field_name('bases')
152 |     if bases_node:
153 |         bases = [b.text.decode('utf8') for b in bases_node.children if b.type != ':']
154 |         class_info['bases'] = "".join(bases)
155 |     
156 |     methods = []
157 |     body_node = struct_node.child_by_field_name('body')
158 |     for _, method_nodes_dict in method_query.matches(body_node):
159 |         method_node = method_nodes_dict['method_def'][0]
160 |         method_info = process_method_node(method_node)
161 |         methods.append(method_info)
162 |         
163 |     if methods:
164 |         class_info['methods'] = methods
165 |         
166 |     return class_info
167 | 
168 | def _process_struct(struct_node, method_query, result):
169 |     """Process a struct node and extract its information.
170 |     
171 |     Args:
172 |         struct_node: The tree-sitter node representing a struct
173 |         method_query: The method query object
174 |         result: The C_Sharp_Result object to populate
175 |         
176 |     Returns:
177 |         dict: Struct information including name and methods
178 |     """
179 |     struct_info = {
180 |         'name': struct_node.child_by_field_name('name').text.decode('utf8')
181 |     }
182 |     
183 |     methods = []
184 |     body_node = struct_node.child_by_field_name('body')
185 |     for _, method_nodes_dict in method_query.matches(body_node):
186 |         method_node = method_nodes_dict['method_def'][0]
187 |         method_info = process_method_node(method_node)
188 |         methods.append(method_info)
189 |         
190 |     if methods:
191 |         struct_info['methods'] = methods
192 |         
193 |     return struct_info
194 | 
195 | def _process_interface(interface_node):
196 |     """Process an interface node and extract its name.
197 |     
198 |     Args:
199 |         interface_node: The tree-sitter node representing an interface
200 |         
201 |     Returns:
202 |         dict: Interface information with name
203 |     """
204 |     return {
205 |         'name': interface_node.child_by_field_name('name').text.decode('utf8')
206 |     }
207 | 
208 | def _process_enum(enum_node):
209 |     """Process an enum node and extract its name.
210 |     
211 |     Args:
212 |         enum_node: The tree-sitter node representing an enum
213 |         
214 |     Returns:
215 |         dict: Enum information with name
216 |     """
217 |     return {
218 |         'name': enum_node.child_by_field_name('name').text.decode('utf8')
219 |     }
220 | 
221 | def extract_types_and_members_from_file_for_csharp(file_path: str) -> C_Sharp_Result:
222 |     """Extract types and members from a C# source file.
223 |     
224 |     Args:
225 |         file_path: Path to the C# file
226 |         
227 |     Returns:
228 |         C_Sharp_Result: Object containing all extracted types and members
229 |     """
230 |     result = C_Sharp_Result()
231 |     
232 |     # Read and validate file
233 |     source_code = _read_and_validate_file(file_path)
234 |     if not source_code:
235 |         return result
236 |     
237 |     # Initialize parser and parse source code
238 |     parser, tree = _initialize_parser(source_code)
239 |     
240 |     # Create queries
241 |     class_query = CSHARP_LANGUAGE.query(CLASS_QUERY_STR).matches(tree.root_node)
242 |     struct_query = Query(CSHARP_LANGUAGE, STRUCT_QUERY_STR).matches(tree.root_node)
243 |     interface_query = Query(CSHARP_LANGUAGE, INTERFACE_QUERY_STR).matches(tree.root_node)
244 |     enum_query = Query(CSHARP_LANGUAGE, ENUM_QUERY_STR).matches(tree.root_node)
245 |     # this one doesn't match the root node directly, because the query is for methods inside classes/structs as well 
246 |     # as top-level methods
247 |     method_query = Query(CSHARP_LANGUAGE, METHOD_QUERY_STR)
248 |     
249 |     # Process classes
250 |     for _, class_nodes_dict in class_query:
251 |         class_node = class_nodes_dict['class_def'][0]
252 |         class_info = _process_class(class_node, method_query, result)
253 |         result.classes.append(class_info)
254 |     
255 |     # Process structs
256 |     for _, struct_node_dict in struct_query:
257 |         struct_node = struct_node_dict['struct_def'][0]
258 |         struct_info = _process_struct(struct_node, method_query, result)
259 |         result.structs.append(struct_info)
260 |     
261 |     # Process interfaces
262 |     for _, interface_node_dict in interface_query:
263 |         interface_node = interface_node_dict['interface_def'][0]
264 |         interface_info = _process_interface(interface_node)
265 |         result.interfaces.append(interface_info)
266 |     
267 |     # Process enums
268 |     for _, enum_node_dict in enum_query:
269 |         enum_node = enum_node_dict['enum_def'][0]
270 |         enum_info = _process_enum(enum_node)
271 |         result.enums.append(enum_info)
272 |     
273 |     return result
274 | 
275 | 


--------------------------------------------------------------------------------
/parser/parser.py:
--------------------------------------------------------------------------------
1 | from parser.csharp_parser import extract_types_and_members_from_file_for_csharp
2 | from parser.python_parser import extract_types_and_members_from_file_for_python
3 | from .typescript_parser import extract_types_and_members_from_file_for_typescript


--------------------------------------------------------------------------------
/parser/python_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from tree_sitter import Parser, Query
  3 | from . import PYTHON_LANGUAGE
  4 | 
  5 | class Python_Result:
  6 |     def __init__(self):
  7 |         self.py_classes = []
  8 |         self.py_functions = []
  9 |         self.py_imports = []
 10 | 
 11 |     def __to_dict__(self):
 12 |         result = {}
 13 |         if self.py_classes:
 14 |             result['py_classes'] = self.py_classes
 15 |         if self.py_functions:
 16 |             result['py_functions'] = self.py_functions
 17 |         if self.py_imports:
 18 |             result['py_imports'] = self.py_imports
 19 |         return result
 20 | 
 21 | # Tree-sitter queries as class-level constants
 22 | CLASS_QUERY = Query(PYTHON_LANGUAGE, """
 23 |     (class_definition
 24 |         name: (identifier) @class_name
 25 |         body: (block) @class_body) @class_def
 26 | """)
 27 | 
 28 | FUNCTION_QUERY = Query(PYTHON_LANGUAGE, """
 29 |     (function_definition
 30 |         name: (identifier) @function_name
 31 |         parameters: (parameters) @params
 32 |         return_type: (type)? @return_type
 33 |         body: (block) @function_body) @function_def
 34 | """)
 35 | 
 36 | IMPORT_QUERY = Query(PYTHON_LANGUAGE, """
 37 |     (import_statement) @import
 38 |     (import_from_statement) @import_from
 39 | """)
 40 | 
 41 | def _should_skip_file(file_path: str) -> bool:
 42 |     """Check if file should be skipped based on path patterns."""
 43 |     return (not file_path.endswith('.py') or 
 44 |             'venv' in file_path or 
 45 |             '__pycache__' in file_path or
 46 |             '_test.py' in file_path or 
 47 |             'test_' in file_path or
 48 |             'node_modules' in file_path)
 49 | 
 50 | def _read_source_code(file_path: str) -> str:
 51 |     """Read and return the contents of a Python source file."""
 52 |     with open(file_path, 'r', encoding='utf-8') as f:
 53 |         return f.read()
 54 | 
 55 | def _process_class(class_node, class_nodes_dict) -> dict:
 56 |     """Process a class node and return class information."""
 57 |     class_info = {
 58 |         'name': class_node.child_by_field_name('name').text.decode('utf8')
 59 |     }
 60 |     
 61 |     # Get base classes
 62 |     bases_node = class_nodes_dict.get('bases', [None])[0]
 63 |     if bases_node:
 64 |         class_info['bases'] = [b.text.decode('utf8') for b in bases_node.children 
 65 |                              if b.type != '(' and b.type != ')']
 66 |     
 67 |     # Get methods
 68 |     methods = []
 69 |     body_node = class_node.child_by_field_name('body')
 70 |     for method_index, method_nodes_dict in FUNCTION_QUERY.matches(body_node):
 71 |         method_node = method_nodes_dict['function_def'][0]
 72 |         methods.append(_process_function(method_node))
 73 |     
 74 |     if methods:
 75 |         class_info['methods'] = methods
 76 |     
 77 |     return class_info
 78 | 
 79 | def _process_function(function_node) -> str:
 80 |     """Process a function node and return its signature."""
 81 |     func_name = function_node.child_by_field_name('name').text.decode('utf8')
 82 |     params = function_node.child_by_field_name('parameters').text.decode('utf8')
 83 |     return_type = function_node.child_by_field_name('return_type')
 84 |     return_type_str = f" -> {return_type.text.decode('utf8')}" if return_type else " -> None"
 85 |     
 86 |     # Get decorators
 87 |     decorators = []
 88 |     if function_node.prev_named_sibling and function_node.prev_named_sibling.type == 'decorator':
 89 |         decorator_node = function_node.prev_named_sibling
 90 |         while decorator_node and decorator_node.type == 'decorator':
 91 |             decorators.append(decorator_node.text.decode('utf8'))
 92 |             decorator_node = decorator_node.prev_named_sibling
 93 |     
 94 |     return f"{' '.join(reversed(decorators))} {func_name}{params}{return_type_str}".strip()
 95 | 
 96 | def _process_imports(tree_root_node, result: Python_Result) -> None:
 97 |     """Process import statements and add them to the result."""
 98 |     for index, import_nodes_dict in IMPORT_QUERY.matches(tree_root_node):
 99 |         import_node = list(import_nodes_dict.values())[0][0]
100 |         result.py_imports.append(import_node.text.decode('utf8'))
101 | 
102 | def extract_types_and_members_from_file_for_python(file_path: str, extract_imports: bool = False) -> Python_Result:
103 |     """
104 |     Extract Python class, function, and import information from a file.
105 |     
106 |     Args:
107 |         file_path: Path to the Python file to analyze
108 |         extract_imports: Whether to extract import statements (default: False)
109 |     
110 |     Returns:
111 |         Python_Result object containing extracted information
112 |     """
113 |     result = Python_Result()
114 |     
115 |     if _should_skip_file(file_path):
116 |         return result
117 |     
118 |     source_code = _read_source_code(file_path)
119 |     parser = Parser(language=PYTHON_LANGUAGE)
120 |     tree = parser.parse(bytes(source_code, 'utf8'))
121 |     
122 |     # Process classes
123 |     for index, class_nodes_dict in CLASS_QUERY.matches(tree.root_node):
124 |         class_node = class_nodes_dict['class_def'][0]
125 |         result.py_classes.append(_process_class(class_node, class_nodes_dict))
126 |     
127 |     # Process top-level functions
128 |     for index, function_nodes_dict in FUNCTION_QUERY.matches(tree.root_node):
129 |         function_node = function_nodes_dict['function_def'][0]
130 |         if (function_node.parent and 
131 |             function_node.parent.type == 'class_definition'):
132 |             continue
133 |             
134 |         result.py_functions.append(_process_function(function_node))
135 |     
136 |     # Process imports if requested
137 |     if extract_imports:
138 |         _process_imports(tree.root_node, result)
139 |     
140 |     return result


--------------------------------------------------------------------------------
/parser/typescript_parser.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tree_sitter
  3 | from . import TYPESCRIPT_LANGUAGE, TSX_LANGUAGE
  4 | from typing import List, Dict, Any, Optional
  5 | 
  6 | class TypeScript_Result:
  7 |     """Holds extracted data from a TypeScript/TSX file."""
  8 |     def __init__(self):
  9 |         self.classes: List[Dict[str, Any]] = []
 10 |         self.interfaces: List[Dict[str, Any]] = []
 11 |         self.functions: List[Dict[str, Any]] = []
 12 |         self.enums: List[Dict[str, Any]] = []
 13 |         self.imports: List[Dict[str, Any]] = []
 14 | 
 15 |     def __to_dict__(self):
 16 |         """Converts the result object to a dictionary."""
 17 |         # remove the start and end lines
 18 |         result_json = {}
 19 |         if self.classes:
 20 |             result_json['classes'] = self.classes
 21 |         if self.interfaces:
 22 |             result_json['interfaces'] = [interface.get("name") for interface in self.interfaces]
 23 |         if self.functions:
 24 |             result_json['functions'] = [f['function_signature'] for f in self.functions]
 25 |         if self.enums:
 26 |             result_json['enums'] = self.enums
 27 |         if self.imports:
 28 |             result_json['imports'] = self.imports
 29 |         return result_json
 30 | 
 31 | # Tree-sitter queries for TypeScript/TSX
 32 | QUERIES = {
 33 |     "imports": """
 34 |         (import_statement
 35 |             (import_clause
 36 |                 (named_imports
 37 |                     (import_specifier
 38 |                         name: (identifier) @import.name))?
 39 |                 (namespace_import (identifier) @import.namespace)?
 40 |                 (identifier)? @import.default) ; Matches default import like 'import React from "react"'
 41 |             source: (string) @import.source)
 42 | 
 43 |         (import_statement
 44 |             source: (string) @import.source) ; Matches side-effect imports like 'import "./styles.css"'
 45 | 
 46 |         (import_statement
 47 |             (import_clause (identifier) @import.default) ; Matches 'import defaultExport from "module-name";'
 48 |             source: (string) @import.source)
 49 |     """,
 50 |     "classes": """
 51 |         (class_declaration
 52 |             name: (type_identifier) @class.name
 53 |             body: (class_body) @class.body)
 54 | 
 55 |         (export_statement
 56 |             declaration: (class_declaration
 57 |                 name: (type_identifier) @class.name
 58 |                 body: (class_body) @class.body))
 59 |     """,
 60 |     "interfaces": """
 61 |         (interface_declaration
 62 |           name: (type_identifier) @interface.name
 63 |           body: (interface_body) @interface.body)
 64 |         
 65 |         (export_statement
 66 |           (interface_declaration
 67 |             name: (type_identifier) @interface.name
 68 |             body: (interface_body) @interface.body))
 69 |     """,
 70 |     "functions": """
 71 |         (function_declaration
 72 |             name: (identifier) @function.name
 73 |             parameters: (formal_parameters) @function.parameters
 74 |             return_type: (_)? @function.return_type
 75 |             body: (statement_block) @function.body)
 76 | 
 77 |         (export_statement
 78 |             declaration: (function_declaration
 79 |                 name: (identifier) @function.name
 80 |                 parameters: (formal_parameters) @function.parameters
 81 |                 return_type: (_)? @function.return_type
 82 |                 body: (statement_block) @function.body))
 83 | 
 84 |         (method_definition
 85 |             name: (property_identifier) @function.name ; Treat methods as functions for now
 86 |             parameters: (formal_parameters) @function.parameters
 87 |             return_type: (_)? @function.return_type
 88 |             body: (statement_block) @function.body)
 89 | 
 90 |         ; Arrow functions assigned to variables (const myFunction = () => {})
 91 |         (lexical_declaration
 92 |             (variable_declarator
 93 |                 name: (identifier) @function.name
 94 |                 value: (arrow_function
 95 |                     parameters: (formal_parameters)? @function.parameters
 96 |                     return_type: (_)? @function.return_type
 97 |                     body: (_) @function.body)))
 98 | 
 99 |         (export_statement
100 |             declaration: (lexical_declaration
101 |                 (variable_declarator
102 |                     name: (identifier) @function.name
103 |                     value: (arrow_function
104 |                         parameters: (formal_parameters)? @function.parameters
105 |                         return_type: (_)? @function.return_type
106 |                         body: (_) @function.body))))
107 |     """,
108 |     "enums": """
109 |         (enum_declaration
110 |             name: (identifier) @enum.name
111 |             body: (enum_body) @enum.body)
112 | 
113 |         (export_statement
114 |             declaration: (enum_declaration
115 |                 name: (identifier) @enum.name
116 |                 body: (enum_body) @enum.body))
117 |     """
118 | }
119 | 
120 | def _should_skip_file(file_path: str) -> bool:
121 |     """Check if file should be skipped based on path patterns."""
122 |     return (not file_path.endswith('.ts') and not file_path.endswith('.tsx') or
123 |             'node_modules' in file_path or
124 |             '__tests__' in file_path or
125 |             'test_' in file_path or
126 |             'test.tsx' in file_path or
127 |             'test.ts' in file_path)
128 | 
129 | def _get_node_text(node: tree_sitter.Node) -> str:
130 |     """Safely decode node text."""
131 |     return node.text.decode('utf8').replace("\n","").replace(" ","") if node else ""
132 | 
133 | def _process_import(capture: Dict[str, tree_sitter.Node]) -> Dict[str, Any]:
134 |     """Processes an import capture."""
135 |     source = _get_node_text(capture.get("import.source")).strip('"\'')
136 |     imported_items = []
137 |     if "import.name" in capture:
138 |         imported_items.append(_get_node_text(capture["import.name"]))
139 |     if "import.namespace" in capture:
140 |         imported_items.append(f"* as {_get_node_text(capture['import.namespace'])}")
141 |     if "import.default" in capture:
142 |          # Check if it's a default import name or part of named imports
143 |         default_node = capture["import.default"]
144 |         # Simple heuristic: if parent is import_clause and it's the first named child, it's likely the default import
145 |         if default_node.parent and default_node.parent.type == 'import_clause' and default_node.prev_sibling is None:
146 |              imported_items.append(f"default as {_get_node_text(default_node)}")
147 |         elif default_node.type == 'identifier' and 'import.name' not in capture and 'import.namespace' not in capture:
148 |              # Handles cases like 'import defaultExport from "module-name";'
149 |              imported_items.append(f"default as {_get_node_text(default_node)}")
150 | 
151 | 
152 |     return {
153 |         "source": source,
154 |         "imported_items": imported_items if imported_items else ["*"], # For side-effect imports or if logic fails
155 |         "start_line": capture.get("import.source", list(capture.values())[0]).start_point[0] + 1,
156 |         "end_line": capture.get("import.source", list(capture.values())[0]).end_point[0] + 1,
157 |     }
158 | 
159 | 
160 | def _process_class(capture: Dict[str, tree_sitter.Node]) -> Dict[str, Any]:
161 |     """Processes a class capture."""
162 |     name_node = capture.get("class.name")
163 |     body_node = capture.get("class.body")
164 |     return {
165 |         "name": _get_node_text(name_node),
166 |         "start_line": name_node.start_point[0] + 1 if name_node else 0,
167 |         "end_line": body_node.end_point[0] + 1 if body_node else 0,
168 |     }
169 | 
170 | def _process_interface(capture: Dict[str, tree_sitter.Node]) -> Dict[str, Any]:
171 |     """Processes an interface capture."""
172 |     name_node = capture.get("interface.name")
173 |     body_node = capture.get("interface.body")
174 |     return {
175 |         "name": _get_node_text(name_node),
176 |         "start_line": name_node.start_point[0] + 1 if name_node else 0,
177 |         "end_line": body_node.end_point[0] + 1 if body_node else 0,
178 |     }
179 | 
180 | def _process_function(capture: Dict[str, tree_sitter.Node]) -> Dict[str, Any] | None:
181 |     """Processes a function or method capture."""
182 |     name_node = capture.get("function.name")
183 |     if not name_node:
184 |         return None
185 |     params_node = capture.get("function.parameters")
186 |     body_node = capture.get("function.body")
187 |     return_type_node = capture.get("function.return_type")
188 | 
189 |     # Determine start and end lines carefully
190 |     start_node = name_node if name_node else list(capture.values())[0] # Fallback to first node
191 |     end_node = body_node if body_node else start_node # Fallback to start node if no body
192 | 
193 |     name = _get_node_text(name_node)
194 |     parameters = _get_node_text(params_node)
195 |     return_type = _get_node_text(return_type_node.child(1)) if return_type_node and return_type_node.child_count > 1 else _get_node_text(return_type_node) # Attempt to get type after ':'
196 | 
197 |     function_signature = f"{name}{parameters}" + (f": {return_type}" if return_type else "")
198 |     return {
199 |         "function_signature":function_signature,
200 |         "start_line": start_node.start_point[0] + 1,
201 |         "end_line": end_node.end_point[0] + 1,
202 |     }
203 | 
204 | def _process_enum(capture: Dict[str, tree_sitter.Node]) -> Dict[str, Any]:
205 |     """Processes an enum capture."""
206 |     name_node = capture.get("enum.name")
207 |     body_node = capture.get("enum.body")
208 |     return {
209 |         "name": _get_node_text(name_node),
210 |         "start_line": name_node.start_point[0] + 1 if name_node else 0,
211 |         "end_line": body_node.end_point[0] + 1 if body_node else 0,
212 |     }
213 | 
214 | 
215 | def extract_types_and_members_from_file_for_typescript(file_path: str, extract_imports: bool = False) -> TypeScript_Result:
216 |     """
217 |     Parses a TypeScript or TSX file and extracts structural information.
218 | 
219 |     Args:
220 |         file_path: The path to the TypeScript/TSX file.
221 |         extract_imports: Whether to extract import statements.
222 | 
223 |     Returns:
224 |         A TypeScript_Result object containing the extracted data.
225 |     """
226 |     result = TypeScript_Result()
227 |     file_extension = os.path.splitext(file_path)[1].lower()
228 |     
229 |     print(f"Parsing file: {file_path} with extension {file_extension}")
230 | 
231 |     if _should_skip_file(file_path):
232 |         return result
233 | 
234 |     language = TSX_LANGUAGE if file_extension == ".tsx" else TYPESCRIPT_LANGUAGE
235 |     if not language:
236 |         print(f"Tree-sitter language for {file_extension} not available.")
237 |         return result # Should not happen if __init__ is correct
238 | 
239 |     parser = tree_sitter.Parser(language=language)
240 | 
241 |     with open(file_path, "rb") as file:
242 |         source_code = file.read()
243 | 
244 |     tree = parser.parse(source_code)
245 |     root_node = tree.root_node
246 | 
247 |     processing_map = {
248 |         "classes": (_process_class, result.classes),
249 |         "interfaces": (_process_interface, result.interfaces),
250 |         "functions": (_process_function, result.functions),
251 |         "enums": (_process_enum, result.enums),
252 |     }
253 |     if extract_imports:
254 |         processing_map["imports"] = (_process_import, result.imports)
255 | 
256 | 
257 |     for query_name, (process_func, result_list) in processing_map.items():
258 |         query_string = QUERIES.get(query_name)
259 |         if not query_string:
260 |             continue
261 | 
262 |         query = language.query(query_string)
263 |         captures: dict[str, list[tree_sitter.Node]] = query.captures(root_node)
264 | 
265 |         # Process captures, grouping by the start line of the primary node
266 |         processed_captures = {}
267 |         capture_items = captures.items()
268 |         for capture_name, node_list in capture_items:
269 |             # Use the start line of the node that defines the item (e.g., class name, function name)
270 |             # Heuristic: Use the first node in the capture group if specific name isn't found
271 |             for node in node_list:
272 |                 primary_node_key = f"{query_name}.name" if f"{query_name}.name" in QUERIES[query_name] else capture_name.split('.')[0] + '.' + capture_name.split('.')[1] # e.g. import.source
273 |                 start_line = node.start_point[0]
274 | 
275 |                 # Find the most relevant node for the start line key
276 |                 relevant_node_for_key = node
277 |                 temp_captures = []
278 |                 for cn, n_list in capture_items:
279 |                     # Check if the node is on the same line as the primary node
280 |                     for n in n_list:
281 |                         if n.start_point[0] == start_line:
282 |                             temp_captures.append((n, cn))
283 | 
284 |                 for n_temp, cn_temp in temp_captures:
285 |                     if cn_temp == primary_node_key:
286 |                         relevant_node_for_key = n_temp
287 |                         break
288 |                     # Fallback for imports where 'source' is key
289 |                     if query_name == "imports" and cn_temp == "import.source":
290 |                          relevant_node_for_key = n_temp
291 |                          # Don't break, maybe find a name later
292 | 
293 |                 key = relevant_node_for_key.start_point[0] # Group by the start line of the defining node
294 | 
295 |                 if key not in processed_captures:
296 |                     processed_captures[key] = {}
297 |                 processed_captures[key][capture_name] = node
298 | 
299 |         # Apply processing function to grouped captures
300 |         for key in sorted(processed_captures.keys()):
301 |             processed_item = process_func(processed_captures[key])
302 |             if processed_item: # Ensure item was processed correctly
303 |                  # Avoid duplicates based on name and start line for functions/classes etc.
304 |                  is_duplicate = False
305 |                  if query_name != "imports": # Imports can have same source
306 |                      for existing_item in result_list:
307 |                          if existing_item.get("name") == processed_item.get("name") and \
308 |                             existing_item.get("start_line") == processed_item.get("start_line"):
309 |                              is_duplicate = True
310 |                              break
311 |                  if not is_duplicate:
312 |                     processed_item.pop("start_line", None)
313 |                     processed_item.pop("end_line", None)
314 |                     result_list.append(processed_item)
315 |     return result
316 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tree-sitter==0.24.0
2 | tree-sitter-python==0.23.6
3 | tree-sitter-c-sharp==0.23.1
4 | tree-sitter-javascript==0.23.1
5 | tree-sitter-typescript==0.23.2
6 | 


--------------------------------------------------------------------------------
/test/benchmark_parser.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from parser.csharp_parser import extract_types_and_members_from_file_for_csharp as tree_sitter_parse
 3 | from parser.csharp_parser import C_Sharp_Result as TreeSitterResult
 4 | 
 5 | # Backup old implementation for comparison
 6 | def regex_parse(file_path: str) -> TreeSitterResult:
 7 |     import re
 8 |     patterns_csharp = {
 9 |         'classes': re.compile(r'\bclass\s+(\w+)(?=\s*[:{])'),
10 |         'structs': re.compile(r'\bstruct\s+(\w+)(?=\s*[:{])'),
11 |         'interfaces': re.compile(r'\binterface\s+(\w+)(?=\s*[:{])'),
12 |         'enums': re.compile(r'\benum\s+(\w+)(?=\s*[{])'),
13 |         'methods': re.compile(r'\b(?:public|private|protected|internal)\s+(?:static\s+)?(?:[\w\<\>\[\]]+\s+)+(\w+)\s*\([^)]*\)\s*(?=\{|=>)')
14 |     }
15 |     
16 |     results = TreeSitterResult()
17 |     with open(file_path, 'r', encoding='utf-8') as file:
18 |         content = file.read()
19 |         for key, pattern in patterns_csharp.items():
20 |             matches = pattern.findall(content)
21 |             if not matches:
22 |                 continue
23 |             for match in matches:
24 |                 getattr(results, key).append(match)
25 |     return results
26 | 
27 | def benchmark_parser(parser_func, file_path: str, iterations: int = 100):
28 |     start_time = time.time()
29 |     
30 |     # Warm up
31 |     for _ in range(5):
32 |         parser_func(file_path)
33 |     
34 |     # Time individual runs
35 |     times = []
36 |     for _ in range(iterations):
37 |         start = time.perf_counter()
38 |         parser_func(file_path)
39 |         end = time.perf_counter()
40 |         times.append(end - start)
41 |     
42 |     total_time = time.time() - start_time
43 |     avg_time = sum(times) / iterations
44 |     min_time = min(times)
45 |     max_time = max(times)
46 |     
47 |     return {
48 |         'total_time': total_time,
49 |         'avg_time': avg_time,
50 |         'min_time': min_time,
51 |         'max_time': max_time,
52 |         'iterations': iterations
53 |     }
54 | 
55 | if __name__ == '__main__':
56 |     test_file = 'resources/test.cs'
57 |     iterations = 100
58 |     
59 |     print("Benchmarking Tree-sitter parser...")
60 |     ts_results = benchmark_parser(tree_sitter_parse, test_file, iterations)
61 |     print(f"Tree-sitter results: {ts_results}")
62 |     
63 |     print("\nBenchmarking Regex parser...")
64 |     regex_results = benchmark_parser(regex_parse, test_file, iterations)
65 |     print(f"Regex results: {regex_results}")
66 |     
67 |     print("\nComparison:")
68 |     print(f"Tree-sitter is {regex_results['avg_time'] / ts_results['avg_time']:.2f}x faster than Regex")
69 |     print(f"Tree-sitter parsed {iterations} files in {ts_results['total_time']:.4f}s")
70 |     print(f"Regex parsed {iterations} files in {regex_results['total_time']:.4f}s")
71 |     
72 |     # Print actual parsing results for verification
73 |     print("\nTree-sitter parse results:")
74 |     print(tree_sitter_parse(test_file).__to_dict__())
75 |     
76 |     print("\nRegex parse results:")
77 |     print(regex_parse(test_file).__to_dict__())


--------------------------------------------------------------------------------
/test/resources/test.cs:
--------------------------------------------------------------------------------
 1 | using System;
 2 | 
 3 | namespace TestProject
 4 | {
 5 |     public interface IExample
 6 |     {
 7 |         void InterfaceMethod();
 8 |     }
 9 | 
10 |     public abstract class BaseClass
11 |     {
12 |         public abstract void AbstractMethod();
13 |     }
14 | 
15 |     public class ExampleClass : BaseClass, IExample
16 |     {
17 |         private int _privateField;
18 |         public string PublicProperty { get; set; }
19 |         
20 |         public const double PI = 3.14;
21 |         
22 |         public override void AbstractMethod()
23 |         {
24 |             Console.WriteLine("Implemented abstract method");
25 |         }
26 |         
27 |         public void InterfaceMethod()
28 |         {
29 |             Console.WriteLine("Implemented interface method");
30 |         }
31 |         
32 |         public string GetGreeting(string name)
33 |         {
34 |             return $"Hello, {name}";
35 |         }
36 |         
37 |         private void PrivateMethod()
38 |         {
39 |             // Do something private
40 |         }
41 |     }
42 | 
43 |     public struct Point
44 |     {
45 |         public int X;
46 |         public int Y;
47 |     }
48 | 
49 |     public enum Status
50 |     {
51 |         Active,
52 |         Inactive,
53 |         Pending
54 |     }
55 | }


--------------------------------------------------------------------------------
/test/resources/test.ts:
--------------------------------------------------------------------------------
 1 | // Sample TypeScript file for testing the parser
 2 | 
 3 | import { AnotherClass } from "./another-module";
 4 | import * as fs from "fs";
 5 | 
 6 | // Simple interface
 7 | interface Person {
 8 |   name: string;
 9 |   age: number;
10 |   greet?(): void; // Optional method
11 | }
12 | 
13 | // Enum definition
14 | export enum Color {
15 |   Red,
16 |   Green,
17 |   Blue = 5, // Explicit value
18 | }
19 | 
20 | // Class definition
21 | class SampleClass extends AnotherClass implements Person {
22 |   public name: string;
23 |   readonly age: number;
24 |   private secret: string;
25 |   protected status: Color;
26 | 
27 |   constructor(name: string, age: number) {
28 |     super();
29 |     this.name = name;
30 |     this.age = age;
31 |     this.secret = "shhh";
32 |     this.status = Color.Green;
33 |   }
34 | 
35 |   // Public method
36 |   public greet(): void {
37 |     console.log(
38 |       `Hello, my name is ${this.name} and I am ${this.age} years old.`
39 |     );
40 |   }
41 | 
42 |   // Private method
43 |   private revealSecret(): string {
44 |     return this.secret;
45 |   }
46 | 
47 |   // Static method
48 |   static createDefault(): SampleClass {
49 |     return new SampleClass("Default", 0);
50 |   }
51 | 
52 |   // Async method
53 |   async loadData(path: string): Promise<string> {
54 |     return new Promise((resolve, reject) => {
55 |       fs.readFile(path, "utf8", (err, data) => {
56 |         if (err) {
57 |           reject(err);
58 |         } else {
59 |           resolve(data);
60 |         }
61 |       });
62 |     });
63 |   }
64 | }
65 | 
66 | // Top-level function
67 | function add(x: number, y: number): number {
68 |   return x + y;
69 | }
70 | 
71 | // Arrow function
72 | const multiply = (a: number, b: number): number => a * b;
73 | 
74 | // Exporting variables
75 | export const PI = 3.14159;
76 | export let version = "1.0.0";
77 | 
78 | // Type alias
79 | type StringOrNumber = string | number;
80 | 
81 | let value: StringOrNumber = "hello";
82 | value = 123;
83 | 
84 | // Using the enum
85 | let myColor: Color = Color.Blue;
86 | 
87 | // Using the class
88 | const personInstance = new SampleClass("Alice", 30);
89 | personInstance.greet();
90 | const defaultPerson = SampleClass.createDefault();
91 | 
92 | console.log(add(5, 3));
93 | console.log(multiply(4, 6));
94 | console.log(`Color: ${Color[myColor]}, Value: ${myColor}`);
95 | 


--------------------------------------------------------------------------------
/test/resources/test.tsx:
--------------------------------------------------------------------------------
 1 | // Sample TSX file for testing the parser
 2 | 
 3 | import React, { useState, useEffect } from "react";
 4 | import { SampleClass } from "./test"; // Assuming test.ts exports SampleClass
 5 | 
 6 | // Interface for component props
 7 | interface MyComponentProps {
 8 |   title: string;
 9 |   initialCount?: number;
10 | }
11 | 
12 | // Functional component with hooks and JSX
13 | const MyComponent: React.FC<MyComponentProps> = ({
14 |   title,
15 |   initialCount = 0,
16 | }) => {
17 |   const [count, setCount] = useState<number>(initialCount);
18 |   const [data, setData] = useState<SampleClass | null>(null);
19 | 
20 |   useEffect(() => {
21 |     // Simulate fetching data
22 |     const instance = new SampleClass("TSX Component", count);
23 |     setData(instance);
24 |     console.log("Component mounted or count updated");
25 | 
26 |     return () => {
27 |       console.log("Component will unmount or count changed");
28 |     };
29 |   }, [count]); // Dependency array
30 | 
31 |   const increment = () => setCount((prevCount) => prevCount + 1);
32 |   const decrement = () => setCount((prevCount) => prevCount - 1);
33 | 
34 |   return (
35 |     <div
36 |       className="my-component"
37 |       style={{ border: "1px solid #ccc", padding: "10px" }}
38 |     >
39 |       <h1>{title}</h1>
40 |       <p>Current Count: {count}</p>
41 |       {data && <p>Data Name: {data.name}</p>}
42 |       <button onClick={increment}>Increment</button>
43 |       <button onClick={decrement}>Decrement</button>
44 |       {/* Self-closing tag */}
45 |       <hr />
46 |       {/* Fragment */}
47 |       <>
48 |         <p>Fragment content</p>
49 |       </>
50 |       {/* Conditional rendering */}
51 |       {count > 5 && <p style={{ color: "green" }}>Count is greater than 5!</p>}
52 |     </div>
53 |   );
54 | };
55 | 
56 | // Another simple component
57 | function SimpleDiv() {
58 |   return <div>Just a simple div.</div>;
59 | }
60 | 
61 | // Exporting the component
62 | export default MyComponent;
63 | export { SimpleDiv };
64 | 
65 | // Top-level variable using JSX type
66 | let element: JSX.Element = <MyComponent title="Test Component" />;
67 | 


--------------------------------------------------------------------------------
/test/test_parser.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | from parser.python_parser import extract_types_and_members_from_file_for_python
 3 | from parser.csharp_parser import extract_types_and_members_from_file_for_csharp
 4 | from parser.typescript_parser import extract_types_and_members_from_file_for_typescript
 5 | 
 6 | def test_python_parser(file_path):
 7 |     print(f"\nTesting Python parser on: {file_path}")
 8 |     
 9 |     start_time = time.time()
10 |     result = extract_types_and_members_from_file_for_python(file_path, extract_imports=True)
11 |     elapsed_time = time.time() - start_time
12 |     
13 |     print(f"\nParsing completed in {elapsed_time:.4f} seconds")
14 |     
15 |     print("\nClasses found:")
16 |     for cls in result.py_classes:
17 |         print(f"- {cls['name']}")
18 |         if 'bases' in cls:
19 |             print(f"  Inherits from: {', '.join(cls['bases'])}")
20 |         if 'methods' in cls:
21 |             print(f"  Methods: {len(cls['methods'])}")
22 |             for method in cls['methods']:
23 |                 print(f"    - {method}")
24 | 
25 | 
26 | def test_csharp_parser(file_path):
27 |     print(f"\nTesting C# parser on: {file_path}")
28 |     
29 |     start_time = time.time()
30 |     result = extract_types_and_members_from_file_for_csharp(file_path)
31 |     elapsed_time = time.time() - start_time
32 |     
33 |     print(f"\nParsing completed in {elapsed_time:.4f} seconds")
34 |     
35 |     print("\nClasses found:")
36 |     for cls in result.classes:
37 |         print(f"- {cls['name']}")
38 |         if 'bases' in cls:
39 |             print(f"  Inherits from: {cls['bases']}")
40 |         if 'methods' in cls:
41 |             print(f"  Methods: {len(cls['methods'])}")
42 |             for method in cls['methods']:
43 |                 print(f"    - {method['name']}()")
44 |                 if 'modifiers' in method:
45 |                     print(f"      Modifiers: {', '.join(method['modifiers'])}")
46 |         
47 |     print("Raw dictionary:")
48 |     print(result.__to_dict__())
49 | 
50 | 
51 | def test_typescript_parser(file_path):
52 |     print(f"\nTesting TypeScript/TSX parser on: {file_path}")
53 |     
54 |     start_time = time.time()
55 |     result = extract_types_and_members_from_file_for_typescript(file_path, extract_imports=True)
56 |     elapsed_time = time.time() - start_time
57 |     
58 |     print(f"\nParsing completed in {elapsed_time:.4f} seconds")
59 |     
60 |     print("\nImports found:")
61 |     for imp in result.imports:
62 |         print(f"- {imp}")
63 | 
64 |     print("\nInterfaces found:")
65 |     for iface in result.interfaces:
66 |         print(f"- {iface['name']}")
67 |         # Add more detail printing if needed
68 | 
69 |     print("\nEnums found:")
70 |     for enm in result.enums:
71 |         print(f"- {enm['name']}")
72 |         # Add more detail printing if needed
73 | 
74 |     print("\nClasses found:")
75 |     for cls in result.classes:
76 |         print(f"- {cls['name']}")
77 |         if 'heritage' in cls:
78 |             print(f"  Heritage: {cls['heritage']}")
79 |         if 'methods' in cls:
80 |             print(f"  Methods: {len(cls['methods'])}")
81 |             for method in cls['methods']:
82 |                 print(f"    - {method['name']}") # Basic method name
83 |         # Add more detail printing if needed
84 | 
85 |     print("\nFunctions found:")
86 |     for func in result.functions:
87 |         print(f"- {func['function_signature']}")
88 |         # Add more detail printing if needed
89 | 
90 |     print("\nRaw dictionary:")
91 |     print(result.__to_dict__())
92 | 
93 | 
94 | if __name__ == "__main__":
95 |     test_python_parser("./Project_Indexer.py")
96 |     test_python_parser("parser/python_parser.py")
97 |     test_csharp_parser("test/resources/test.cs")
98 |     test_typescript_parser("test/resources/test.ts")
99 |     test_typescript_parser("test/resources/test.tsx")


--------------------------------------------------------------------------------