├── img.png ├── .gitignore ├── security.md ├── rails_pipeline ├── __init__.py ├── find_api_definition_files.py ├── definition_swagger_generator.py └── generate_file_information.py ├── nodejs_pipeline ├── constants.py ├── definition_swagger_generator.py ├── find_api_definition_files.py ├── generate_file_information.py ├── run_swagger_generation.py └── identify_api_functions.py ├── requirements.txt ├── .dockerignore ├── python_pipeline ├── definition_swagger_generator.py ├── find_api_definition_files.py ├── identify_api_functions.py ├── run_swagger_generation.py └── generate_file_information.py ├── framework_identifier.py ├── LICENSE ├── config.py ├── Dockerfile ├── golang_pipeline ├── find_api_definition_files.py ├── definition_swagger_generator.py ├── generate_file_information.py ├── identify_api_functions.py └── run_swagger_generation.py ├── .github └── workflows │ ├── README.md │ └── docker-build.yml ├── llm_client.py ├── file_scanner.py ├── docker-entrypoint.sh ├── faiss_index_generator.py ├── user_config.py ├── swagger_mcp.py ├── bootstrap_mcp_runner.sh ├── config.yml ├── run.sh ├── endpoints_extractor.py ├── CODE_OF_CONDUCT.md ├── README.md ├── CONTRIBUTING.md ├── utils.py ├── swagger_generation_cli.py └── swagger_generator.py /img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qodex-ai/apimesh/HEAD/img.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycahce__/ 2 | */__pycahce__/ 3 | **/__pycahce__/ 4 | .qodexai/ 5 | .DS_Store 6 | .idea/ 7 | -------------------------------------------------------------------------------- /security.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Reporting a Vulnerability 4 | 5 | If you notice a security vulnerability, please let the team know by [sending an email to support@qodex.ai](mailto:support@qodex.ai). 6 | -------------------------------------------------------------------------------- /rails_pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ruby on Rails specific Swagger generation helpers. 3 | 4 | The package mirrors the structure and responsibilities of the existing 5 | Node.js and Python generators so that the orchestrator can plug in a 6 | framework-specific implementation with minimal conditional logic. 7 | """ 8 | 9 | -------------------------------------------------------------------------------- /nodejs_pipeline/constants.py: -------------------------------------------------------------------------------- 1 | SUPPORTED_NODE_FILE_EXTENSIONS = ( 2 | ".js", 3 | ".cjs", 4 | ".mjs", 5 | ".ts", 6 | ".tsx", 7 | ".cts", 8 | ".mts", 9 | ) 10 | 11 | JAVASCRIPT_FILE_EXTENSIONS = (".js", ".cjs", ".mjs") 12 | TYPESCRIPT_FILE_EXTENSIONS = (".ts", ".cts", ".mts") 13 | TSX_FILE_EXTENSIONS = (".tsx",) 14 | 15 | METADATA_DIR_NAME = "qodex_file_information" 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.16 2 | langchain-community==0.3.16 3 | langchain-core==0.3.63 4 | langchain-openai==0.3.5 5 | langsmith==0.1.139 6 | openai==1.76.0 7 | tiktoken==0.8.0 8 | faiss-cpu==1.9.0.post1 9 | langchain-text-splitters==0.3.4 10 | pyyaml==6.0.2 11 | numpy<2 12 | tree-sitter==0.25.1 13 | tree-sitter-python==0.23.6 14 | tree-sitter-javascript==0.23.1 15 | tree-sitter-ruby==0.23.1 16 | tree-sitter-go==0.25.0 17 | tree-sitter-typescript==0.23.2 18 | esprima==4.0.1 19 | requests 20 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | .gitattributes 5 | 6 | # Python 7 | __pycache__ 8 | *.pyc 9 | *.pyo 10 | *.pyd 11 | .Python 12 | *.so 13 | *.egg 14 | *.egg-info 15 | dist 16 | build 17 | *.whl 18 | qodexai-virtual-env/ 19 | venv/ 20 | env/ 21 | ENV/ 22 | 23 | # IDE 24 | .vscode/ 25 | .idea/ 26 | *.swp 27 | *.swo 28 | *~ 29 | 30 | # OS 31 | .DS_Store 32 | Thumbs.db 33 | 34 | # Documentation 35 | *.md 36 | !README.md 37 | 38 | # Other 39 | *.log 40 | .pytest_cache/ 41 | .coverage 42 | htmlcov/ 43 | 44 | # User config (will be generated) 45 | .qodexai/ 46 | 47 | # Generated swagger files (users will generate their own) 48 | swagger.json 49 | *.json 50 | 51 | -------------------------------------------------------------------------------- /python_pipeline/definition_swagger_generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | from prompts import python_swagger_prompt 3 | from llm_client import OpenAiClient 4 | 5 | 6 | 7 | def get_function_definition_swagger(function_definition, context, route): 8 | openai_ai_client = OpenAiClient() 9 | messages = [{ 10 | "role": "user", 11 | "content": python_swagger_prompt.format(route = route, function_definition = function_definition, context = context) 12 | }] 13 | response = openai_ai_client.call_chat_completion(messages=messages, temperature=1) 14 | start_index = response.find('{') 15 | end_index = response.rfind('}') 16 | swagger_json_block = response[start_index:end_index + 1] 17 | return json.loads(swagger_json_block) 18 | -------------------------------------------------------------------------------- /nodejs_pipeline/definition_swagger_generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | from prompts import node_js_prompt 3 | from llm_client import OpenAiClient 4 | 5 | 6 | 7 | def get_function_definition_swagger(function_definition, context, route): 8 | openai_ai_client = OpenAiClient() 9 | content = node_js_prompt.format(route = route, function_definition = function_definition, context=context) 10 | messages = [{ 11 | "role": "user", 12 | "content": content 13 | }] 14 | response = openai_ai_client.call_chat_completion(messages=messages, temperature=1) 15 | start_index = response.find('{') 16 | end_index = response.rfind('}') 17 | swagger_json_block = response[start_index:end_index + 1] 18 | return json.loads(swagger_json_block) 19 | -------------------------------------------------------------------------------- /framework_identifier.py: -------------------------------------------------------------------------------- 1 | import json 2 | from config import Configurations 3 | from prompts import framework_identifier_prompt, framework_identifier_system_prompt 4 | from llm_client import OpenAiClient 5 | 6 | 7 | class FrameworkIdentifier: 8 | def __init__(self): 9 | self.config = Configurations() 10 | self.openai_client = OpenAiClient() 11 | 12 | 13 | def get_framework(self, file_paths): 14 | prompt = framework_identifier_prompt.format(file_paths = file_paths, frameworks = str(list(self.config.routing_patters_map.keys()))) 15 | messages = [ 16 | {"role": "system", "content": framework_identifier_system_prompt}, 17 | {"role": "user", "content": prompt} 18 | ] 19 | response_content = self.openai_client.call_chat_completion(messages=messages) 20 | start_index = response_content.find('{') 21 | end_index = response_content.rfind('}') 22 | swagger_json_block = response_content[start_index:end_index + 1] 23 | return json.loads(swagger_json_block) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 qodex-ai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | 4 | 5 | class Configurations: 6 | def __init__(self): 7 | # Get config path from environment variable 8 | config_path = os.environ.get("APIMESH_CONFIG_PATH") 9 | if config_path is None: 10 | raise ValueError( 11 | "APIMESH_CONFIG_PATH environment variable is not set. " 12 | "Please set it to the path of your config.yml file." 13 | ) 14 | 15 | # Load YAML configurations 16 | self.config = self._load_config(config_path) 17 | 18 | # Assign values from the YAML file 19 | self.ignored_dirs = set(self.config.get("ignored_dirs", [])) 20 | self.routing_patters_map = self.config.get("routing_patterns_map", {}) 21 | self.gpt_4o_model_name = self.config.get("gpt_4o_model_name", "gpt-4o") 22 | 23 | def _load_config(self, config_path): 24 | """Loads configuration from a YAML file.""" 25 | with open(config_path, "r", encoding="utf-8") as file: 26 | config = yaml.safe_load(file) 27 | return config if config is not None else {} 28 | 29 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | # Set working directory 4 | WORKDIR /app 5 | 6 | # Install system dependencies 7 | RUN apt-get update && apt-get install -y \ 8 | git \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | # Copy requirements file 12 | COPY requirements.txt . 13 | 14 | # Install Python dependencies 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | 17 | # Copy the entire application 18 | COPY . . 19 | 20 | # Create a directory for mounted repos (users will mount their repo here) 21 | RUN mkdir -p /workspace 22 | 23 | # Create entrypoint script 24 | COPY docker-entrypoint.sh /usr/local/bin/ 25 | RUN chmod +x /usr/local/bin/docker-entrypoint.sh 26 | 27 | # Set environment variables for config file paths 28 | ENV APIMESH_CONFIG_PATH=/app/config.yml 29 | ENV APIMESH_USER_CONFIG_PATH=/workspace/apimesh/config.json 30 | ENV APIMESH_USER_REPO_PATH=/workspace 31 | ENV APIMESH_OUTPUT_FILEPATH=/workspace/apimesh/swagger.json 32 | 33 | # Set the entrypoint 34 | ENTRYPOINT ["docker-entrypoint.sh"] 35 | 36 | # Default command - run interactively if no arguments provided 37 | # Users can override by passing arguments: docker run ... qodexai/apimesh --help 38 | CMD [] 39 | 40 | -------------------------------------------------------------------------------- /golang_pipeline/find_api_definition_files.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | from config import Configurations 5 | 6 | config = Configurations() 7 | 8 | 9 | def _is_ignored(path: Path) -> bool: 10 | return any(part in config.ignored_dirs for part in path.parts) 11 | 12 | 13 | def _is_test_file(path: Path) -> bool: 14 | return path.name.endswith("_test.go") 15 | 16 | 17 | def _looks_like_routing_file(path: Path) -> bool: 18 | """ 19 | Heuristic to bubble up files that are likely to contain router definitions. 20 | """ 21 | lowered = path.as_posix().lower() 22 | candidates = ( 23 | "route", 24 | "router", 25 | "handler", 26 | "controller", 27 | "server", 28 | "api", 29 | "http", 30 | ) 31 | return any(token in lowered for token in candidates) 32 | 33 | 34 | def find_go_files(directory: str) -> List[Path]: 35 | base_path = Path(directory) 36 | go_files: List[Path] = [] 37 | for file_path in base_path.rglob("*.go"): 38 | if _is_ignored(file_path) or _is_test_file(file_path): 39 | continue 40 | go_files.append(file_path) 41 | return go_files 42 | 43 | 44 | def find_api_definition_files(directory: str) -> List[str]: 45 | go_files = find_go_files(directory) 46 | go_files.sort(key=lambda p: (0 if _looks_like_routing_file(p) else 1, str(p))) 47 | return [str(path) for path in go_files] 48 | 49 | -------------------------------------------------------------------------------- /rails_pipeline/find_api_definition_files.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import List 3 | 4 | from config import Configurations 5 | 6 | config = Configurations() 7 | 8 | 9 | def _is_ignored(path: Path) -> bool: 10 | return any(part in config.ignored_dirs for part in path.parts) 11 | 12 | 13 | def _looks_like_controller(path: Path) -> bool: 14 | if "app" not in path.parts: 15 | return False 16 | if "controllers" not in path.parts: 17 | return False 18 | return path.name.endswith("_controller.rb") 19 | 20 | 21 | def _looks_like_route_file(path: Path) -> bool: 22 | return path.as_posix().endswith("config/routes.rb") 23 | 24 | 25 | def find_ruby_files(directory: str) -> List[Path]: 26 | directory_path = Path(directory) 27 | ruby_files: List[Path] = [] 28 | for file_path in directory_path.rglob("*.rb"): 29 | if not _is_ignored(file_path): 30 | ruby_files.append(file_path) 31 | return ruby_files 32 | 33 | 34 | def find_api_definition_files(directory: str) -> List[str]: 35 | ruby_files = find_ruby_files(directory) 36 | api_files: List[str] = [] 37 | 38 | for ruby_file in ruby_files: 39 | if _looks_like_route_file(ruby_file): 40 | api_files.append(str(ruby_file)) 41 | continue 42 | if _looks_like_controller(ruby_file): 43 | api_files.append(str(ruby_file)) 44 | 45 | api_files.sort( 46 | key=lambda path: 0 if path.endswith("config/routes.rb") else 1 47 | ) 48 | return api_files 49 | -------------------------------------------------------------------------------- /.github/workflows/README.md: -------------------------------------------------------------------------------- 1 | # GitHub Actions Workflows 2 | 3 | ## Docker Build Workflow 4 | 5 | This workflow automatically builds and pushes Docker images to Docker Hub when tags are pushed to the repository. 6 | 7 | ### How it works: 8 | 9 | 1. **Trigger**: Automatically runs when you push a tag matching pattern `v*.*.*` (e.g., `v1.0.0`, `v2.1.3`) 10 | 2. **Build**: Builds the Docker image using the Dockerfile 11 | 3. **Tag**: Tags the image with: 12 | - The full tag name (e.g., `v1.0.0`) 13 | - `latest` (always updated to the newest tag) 14 | 4. **Push**: Pushes all tags to Docker Hub 15 | 16 | ### Setup Instructions: 17 | 18 | 1. **Create a Docker Hub token**: 19 | - Go to Docker Hub → Account Settings → Security 20 | - Click "New Access Token" 21 | - Give it a name (e.g., "github-actions") 22 | - Copy the token 23 | 24 | 2. **Add the token to GitHub Secrets**: 25 | - Go to your GitHub repository → Settings → Secrets and variables → Actions 26 | - Click "New repository secret" 27 | - Name: `DOCKER_HUB_TOKEN` 28 | - Value: Paste your Docker Hub token 29 | - Click "Add secret" 30 | 31 | 3. **Create and push a tag**: 32 | ```bash 33 | git tag v1.0.0 34 | git push origin v1.0.0 35 | ``` 36 | 37 | ### Version Tagging: 38 | 39 | - Tag format: `v1.0.0`, `v2.1.3`, etc. 40 | - Images will be tagged as: 41 | - `qodexai/apimesh:v1.0.0` (full tag) 42 | - `qodexai/apimesh:latest` (always points to newest) 43 | 44 | ### Manual Trigger: 45 | 46 | You can also manually trigger the workflow from the Actions tab in GitHub. 47 | 48 | -------------------------------------------------------------------------------- /.github/workflows/docker-build.yml: -------------------------------------------------------------------------------- 1 | name: Build and Push Docker Image 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*.*.*' # Triggers on tags like v1.0.0, v2.1.3, etc. 7 | workflow_dispatch: # Allows manual triggering 8 | 9 | env: 10 | DOCKER_HUB_USERNAME: qodexai 11 | IMAGE_NAME: apimesh 12 | 13 | jobs: 14 | build-and-push: 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v4 20 | 21 | - name: Set up Docker Buildx 22 | uses: docker/setup-buildx-action@v3 23 | 24 | - name: Extract version from tag 25 | id: tag_version 26 | run: | 27 | if [[ "${{ github.ref }}" == refs/tags/* ]]; then 28 | echo "tag=${{ github.ref_name }}" >> $GITHUB_OUTPUT 29 | else 30 | # For manual dispatch, use a default version 31 | echo "tag=dev" >> $GITHUB_OUTPUT 32 | fi 33 | 34 | - name: Log in to Docker Hub 35 | uses: docker/login-action@v3 36 | with: 37 | username: ${{ env.DOCKER_HUB_USERNAME }} 38 | password: ${{ secrets.DOCKER_HUB_TOKEN }} 39 | 40 | - name: Build and push Docker image 41 | uses: docker/build-push-action@v5 42 | with: 43 | context: . 44 | push: true 45 | pull: true 46 | tags: | 47 | ${{ env.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_NAME }}:${{ steps.tag_version.outputs.tag }} 48 | ${{ env.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_NAME }}:latest 49 | cache-from: type=gha 50 | cache-to: type=gha,mode=max 51 | platforms: linux/amd64,linux/arm64 52 | 53 | - name: Image digest 54 | run: | 55 | echo "Image pushed with tags:" 56 | echo " - ${{ steps.tag_version.outputs.tag }}" 57 | echo " - latest" 58 | 59 | -------------------------------------------------------------------------------- /llm_client.py: -------------------------------------------------------------------------------- 1 | from openai import OpenAI 2 | from langchain_openai import OpenAIEmbeddings 3 | from config import Configurations 4 | import json, os 5 | 6 | config = Configurations() 7 | 8 | class OpenAiClient: 9 | def __init__(self): 10 | self.openai_api_key = self.load_openai_api_key() 11 | self.client = OpenAI( 12 | api_key=self.openai_api_key) 13 | self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=self.openai_api_key) 14 | 15 | def call_chat_completion(self, messages, temperature=0.5): 16 | model = self.load_openai_model() 17 | # The Responses API is required for Codex models (chat.completions is unsupported). 18 | effective_temperature = 1 if model.startswith("gpt-5") else temperature 19 | response = self.client.responses.create( 20 | model=model, 21 | input=messages, 22 | temperature=effective_temperature, 23 | ) 24 | return response.output_text 25 | 26 | @staticmethod 27 | def load_openai_api_key(): 28 | config_file = os.environ.get("APIMESH_USER_CONFIG_PATH") 29 | if config_file is None: 30 | raise ValueError( 31 | "APIMESH_USER_CONFIG_PATH environment variable is not set. " 32 | "Please set it to the path of your config.json file." 33 | ) 34 | with open(config_file, "r") as file: 35 | user_config_data = json.load(file) 36 | return user_config_data['openai_api_key'] 37 | 38 | def load_openai_model(self): 39 | config_file = os.environ.get("APIMESH_USER_CONFIG_PATH") 40 | if config_file is None: 41 | raise ValueError( 42 | "APIMESH_USER_CONFIG_PATH environment variable is not set. " 43 | "Please set it to the path of your config.json file." 44 | ) 45 | with open(config_file, "r") as file: 46 | user_config_data = json.load(file) 47 | return user_config_data['openai_model'] 48 | -------------------------------------------------------------------------------- /nodejs_pipeline/find_api_definition_files.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | from config import Configurations 4 | from nodejs_pipeline.constants import SUPPORTED_NODE_FILE_EXTENSIONS 5 | 6 | config = Configurations() 7 | 8 | API_DECORATOR_NAMES = { 9 | 'route', 'get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'all', 10 | 'api', 'endpoint', 'router', 'controller', 'module', 'middleware', 'rest' 11 | } 12 | 13 | HTTP_METHODS = ['get', 'post', 'put', 'delete', 'patch', 'options', 'head'] 14 | ROUTE_OBJECT_PREFIXES = ['app', 'router', 'route', 'api', 'controller', 'server'] 15 | ROUTE_OBJECT_SUFFIXES = ['Router', 'Routes', 'Api', 'Controller', 'App', 'Server'] 16 | 17 | route_prefix_pattern = r'(?:' + '|'.join(ROUTE_OBJECT_PREFIXES) + r')' 18 | route_suffix_pattern = r'(?:[A-Za-z_$][\w$]*?(?:' + '|'.join(ROUTE_OBJECT_SUFFIXES) + r'))' 19 | route_object_pattern = r'(?:' + route_prefix_pattern + r'|' + route_suffix_pattern + r')' 20 | 21 | # Regex patterns to detect API routes or decorators 22 | ROUTE_METHOD_PATTERN = re.compile( 23 | r'\b' + route_object_pattern + r'\s*\.\s*(?:' + '|'.join(HTTP_METHODS) + r')\s*\(', 24 | re.IGNORECASE 25 | ) 26 | 27 | DECORATOR_PATTERN = re.compile( 28 | r'@\s*(' + '|'.join(API_DECORATOR_NAMES) + r')\b', 29 | re.IGNORECASE 30 | ) 31 | 32 | def find_node_files(directory): 33 | directory = Path(directory) 34 | node_files = [] 35 | for file in directory.rglob('*'): 36 | if file.suffix and file.suffix.lower() in SUPPORTED_NODE_FILE_EXTENSIONS: 37 | if not any(part in config.ignored_dirs for part in file.parts): 38 | node_files.append(file) 39 | return node_files 40 | 41 | def file_contains_api_defs(file_path): 42 | try: 43 | text = file_path.read_text(encoding='utf-8') 44 | except Exception: 45 | return False 46 | 47 | if ROUTE_METHOD_PATTERN.search(text): 48 | return True 49 | 50 | if DECORATOR_PATTERN.search(text): 51 | return True 52 | 53 | return False 54 | 55 | def find_api_definition_files(directory): 56 | node_files = find_node_files(directory) 57 | api_files = [] 58 | for node_file in node_files: 59 | if file_contains_api_defs(node_file): 60 | api_files.append(str(node_file)) 61 | return api_files 62 | -------------------------------------------------------------------------------- /file_scanner.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import List 3 | from config import Configurations 4 | from utils import get_repo_path 5 | import re 6 | 7 | config = Configurations() 8 | class FileScanner: 9 | 10 | def __init__(self): 11 | pass 12 | 13 | def get_all_file_paths(self) -> List[str|bytes]: 14 | """ 15 | Get all file paths in the repository, ignoring specified directories 16 | """ 17 | repo_path = get_repo_path() 18 | file_paths = [] 19 | supported_extensions = ('.py', '.js', '.ts', '.java', '.rb', '.go') 20 | 21 | for root, dirs, files in os.walk(repo_path): 22 | dirs[:] = [d for d in dirs if d not in config.ignored_dirs] 23 | 24 | if not self.should_process_directory(root): 25 | continue 26 | 27 | for file in files: 28 | if file.endswith(supported_extensions): 29 | file_path = os.path.join(root, file) 30 | file_paths.append(file_path) 31 | return file_paths 32 | 33 | @staticmethod 34 | def find_api_files(file_paths, framework): 35 | patterns = config.routing_patters_map.get(framework) 36 | if not patterns: 37 | print(f"Warning: No routing patterns configured for framework '{framework or 'unknown'}'. Scanning all supported files.") 38 | return list(file_paths) 39 | api_files = [] 40 | for file_path in file_paths: 41 | try: 42 | with open(file_path, 'r', encoding='utf-8') as file: 43 | content = file.read() 44 | if any(re.search(pattern, content) for pattern in patterns): 45 | if framework == "ruby_on_rails": 46 | if file_path.endswith('.rb'): 47 | api_files.append(file_path) 48 | else: 49 | api_files.append(file_path) 50 | except (UnicodeDecodeError, FileNotFoundError): 51 | continue 52 | return api_files 53 | 54 | @staticmethod 55 | def should_process_directory(dir_path: str) -> bool: 56 | """ 57 | Check if a directory should be processed or ignored 58 | """ 59 | path_parts = dir_path.split(os.sep) 60 | return not any(part in config.ignored_dirs for part in path_parts) 61 | -------------------------------------------------------------------------------- /golang_pipeline/definition_swagger_generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | from typing import List, Optional 3 | 4 | from llm_client import OpenAiClient 5 | from prompts import ( 6 | golang_swagger_generation_prompt, 7 | swagger_generation_system_prompt, 8 | ) 9 | 10 | 11 | def _extract_json_block(raw_text: str) -> Optional[str]: 12 | if not raw_text: 13 | return None 14 | start = raw_text.find("{") 15 | end = raw_text.rfind("}") 16 | if start == -1 or end == -1 or end <= start: 17 | return None 18 | return raw_text[start : end + 1] 19 | 20 | 21 | def _cleanup_swagger_payload(payload: dict) -> dict: 22 | paths = payload.get("paths", {}) 23 | for path_data in paths.values(): 24 | for method_data in path_data.values(): 25 | auth_tag = method_data.get("auth_tag") 26 | if auth_tag is None or str(auth_tag).strip() == "": 27 | method_data.pop("auth_tag", None) 28 | return payload 29 | 30 | 31 | def get_function_definition_swagger( 32 | function_definition: List[str], 33 | context: List[List[str]], 34 | route: str, 35 | http_method: Optional[str] = None, 36 | ) -> dict: 37 | client = OpenAiClient() 38 | function_text = "".join(function_definition) 39 | context_text = "\n\n".join("".join(block) for block in context) if context else "" 40 | 41 | prompt = golang_swagger_generation_prompt.format( 42 | endpoint_method=http_method or "GET", 43 | endpoint_path=route, 44 | endpoint_method_lower=(http_method or "GET").lower(), 45 | endpoint_info=function_text, 46 | authentication_information=context_text, 47 | ) 48 | 49 | messages = [ 50 | {"role": "system", "content": swagger_generation_system_prompt}, 51 | {"role": "user", "content": prompt}, 52 | ] 53 | 54 | last_error: Optional[Exception] = None 55 | for _ in range(3): 56 | response = client.call_chat_completion(messages=messages, temperature=0) 57 | payload = _extract_json_block(response) 58 | if not payload: 59 | last_error = ValueError("LLM response was missing JSON payload.") 60 | continue 61 | try: 62 | return _cleanup_swagger_payload(json.loads(payload)) 63 | except json.JSONDecodeError as exc: 64 | last_error = exc 65 | raise ValueError("Unable to parse Swagger JSON response.") from last_error 66 | -------------------------------------------------------------------------------- /python_pipeline/find_api_definition_files.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import ast 3 | from config import Configurations 4 | 5 | config = Configurations() 6 | 7 | API_DECORATOR_NAMES = { 8 | 'route', 'get', 'post', 'put', 'delete', 'patch', 9 | 'api', 'endpoint', 'router', 'viewset', 'view' 10 | } 11 | def find_python_files(directory): 12 | directory = Path(directory) 13 | python_files = [] 14 | for py_file in directory.rglob('*.py'): 15 | # Check if any parent directory is in IGNORE_DIRS 16 | if not any(part in config.ignored_dirs for part in py_file.parts): 17 | python_files.append(py_file) 18 | return python_files 19 | 20 | def has_api_decorator(decorator_node): 21 | if isinstance(decorator_node, ast.Call) and hasattr(decorator_node.func, 'attr'): 22 | if decorator_node.func.attr.lower() in API_DECORATOR_NAMES: 23 | return True 24 | if isinstance(decorator_node, ast.Attribute): 25 | if decorator_node.attr.lower() in API_DECORATOR_NAMES: 26 | return True 27 | if isinstance(decorator_node, ast.Name): 28 | if decorator_node.id.lower() in API_DECORATOR_NAMES: 29 | return True 30 | return False 31 | 32 | def file_contains_api_defs(file_path): 33 | try: 34 | source = file_path.read_text(encoding='utf-8') 35 | tree = ast.parse(source, filename=str(file_path)) 36 | except Exception: 37 | return False 38 | for node in ast.walk(tree): 39 | if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)): 40 | for decorator in node.decorator_list: 41 | if has_api_decorator(decorator): 42 | return True 43 | if isinstance(node, ast.ClassDef): 44 | for decorator in node.decorator_list: 45 | if has_api_decorator(decorator): 46 | return True 47 | for base in node.bases: 48 | if isinstance(base, ast.Name) and base.id.lower() in API_DECORATOR_NAMES: 49 | return True 50 | if isinstance(base, ast.Attribute) and base.attr.lower() in API_DECORATOR_NAMES: 51 | return True 52 | return False 53 | 54 | def find_api_definition_files(directory): 55 | py_files = find_python_files(directory) 56 | api_files = [] 57 | for py_file in py_files: 58 | if file_contains_api_defs(py_file): 59 | api_files.append(str(py_file)) 60 | return api_files 61 | 62 | # directory = Path('/Users/ankits/PycharmProjects/data-science-model-serving') 63 | # api_files = find_api_definition_files(directory) 64 | # print(api_files) 65 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | # Default values 5 | PROJECT_API_KEY="${PROJECT_API_KEY:-null}" 6 | OPENAI_API_KEY="${OPENAI_API_KEY:-null}" 7 | AI_CHAT_ID="${AI_CHAT_ID:-null}" 8 | 9 | # Parse command line arguments 10 | while [[ $# -gt 0 ]]; do 11 | case "$1" in 12 | --project-api-key) 13 | PROJECT_API_KEY="$2" 14 | shift 2 15 | ;; 16 | --openai-api-key) 17 | OPENAI_API_KEY="$2" 18 | shift 2 19 | ;; 20 | --ai-chat-id) 21 | AI_CHAT_ID="$2" 22 | shift 2 23 | ;; 24 | --help) 25 | echo "Swagger Generator Docker Image" 26 | echo "" 27 | echo "Usage (run from your repository directory):" 28 | echo "" 29 | echo " # Interactive mode - prompts for missing inputs:" 30 | echo " cd /path/to/your/repo" 31 | echo " docker run --pull always -it --rm -v \$(pwd):/workspace qodexai/apimesh" 32 | echo "" 33 | echo " # With environment variables:" 34 | echo " cd /path/to/your/repo" 35 | echo " docker run --pull always--rm -v \$(pwd):/workspace \\" 36 | echo " -e OPENAI_API_KEY=your_key \\" 37 | echo " -e PROJECT_API_KEY=your_key \\" 38 | echo " -e AI_CHAT_ID=your_chat_id \\" 39 | echo " qodexai/apimesh" 40 | echo "" 41 | echo " # With command-line arguments:" 42 | echo " cd /path/to/your/repo" 43 | echo " docker run --pull always --rm -v \$(pwd):/workspace \\" 44 | echo " qodexai/apimesh \\" 45 | echo " --openai-api-key your_key" 46 | echo "" 47 | echo "Environment Variables (all optional - will prompt if not provided):" 48 | echo " OPENAI_API_KEY - Your OpenAI API key" 49 | echo " PROJECT_API_KEY - Your project API key" 50 | echo " AI_CHAT_ID - Target AI chat ID" 51 | echo "" 52 | echo "Arguments (all optional - will prompt if not provided):" 53 | echo " --project-api-key - Override PROJECT_API_KEY env var" 54 | echo " --openai-api-key - Override OPENAI_API_KEY env var" 55 | echo " --ai-chat-id - Override AI_CHAT_ID env var" 56 | echo "" 57 | echo "Note: Always run docker commands from your repository directory. Use -it flags for interactive mode." 58 | exit 0 59 | ;; 60 | *) 61 | echo "Unknown option: $1" 62 | echo "Use --help for usage information" 63 | exit 1 64 | ;; 65 | esac 66 | done 67 | 68 | # Normalize values - pass empty string if null so Python script can prompt 69 | if [ "$PROJECT_API_KEY" == "null" ] || [ -z "$PROJECT_API_KEY" ]; then 70 | PROJECT_API_KEY="" 71 | fi 72 | 73 | if [ "$OPENAI_API_KEY" == "null" ] || [ -z "$OPENAI_API_KEY" ]; then 74 | OPENAI_API_KEY="" 75 | fi 76 | 77 | if [ "$AI_CHAT_ID" == "null" ] || [ -z "$AI_CHAT_ID" ]; then 78 | AI_CHAT_ID="" 79 | fi 80 | 81 | # Run the swagger generation 82 | # The Python script will prompt for any missing values 83 | cd /app 84 | export PYTHONPATH=/app:$PYTHONPATH 85 | 86 | python3 swagger_generation_cli.py "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID" -------------------------------------------------------------------------------- /rails_pipeline/definition_swagger_generator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | from typing import List, Optional 4 | 5 | from llm_client import OpenAiClient 6 | from prompts import ruby_on_rails_swagger_generation_prompt 7 | 8 | 9 | _SYSTEM_PROMPT = ( 10 | "You are a meticulous API documentation assistant. " 11 | "Respond with a single valid JSON object that matches the requested schema. " 12 | "Do not include any surrounding prose, markdown, or code fences." 13 | ) 14 | 15 | 16 | def _extract_json_block(raw_text: str) -> Optional[str]: 17 | """ 18 | Extract the JSON payload from a raw LLM response, handling code fences and 19 | other wrapping text the model might emit. 20 | """ 21 | if not raw_text: 22 | return None 23 | 24 | fence_match = re.search( 25 | r"```(?:json)?\s*(\{[\s\S]*\})\s*```", raw_text, flags=re.IGNORECASE 26 | ) 27 | if fence_match: 28 | return fence_match.group(1).strip() 29 | 30 | start_index = raw_text.find("{") 31 | end_index = raw_text.rfind("}") 32 | if start_index == -1 or end_index == -1 or end_index <= start_index: 33 | return None 34 | return raw_text[start_index : end_index + 1].strip() 35 | 36 | 37 | def get_function_definition_swagger( 38 | function_definition: List[str], 39 | context: List[List[str]], 40 | route: str, 41 | http_method: Optional[str] = None, 42 | ) -> dict: 43 | """ 44 | Delegate the heavy lifting of producing a Swagger snippet for a single 45 | Rails endpoint to the LLM, mirroring the behaviour of the Node and Python 46 | generators. 47 | """ 48 | openai_ai_client = OpenAiClient() 49 | function_definition_text = "".join(function_definition) 50 | context_text = "\n\n".join("".join(block) for block in context) if context else "" 51 | endpoint_info_text = ( 52 | f"{function_definition_text}\n\n{context_text}" 53 | if context_text 54 | else function_definition_text 55 | ) 56 | 57 | prompt = ruby_on_rails_swagger_generation_prompt.format( 58 | endpoint_info=endpoint_info_text, 59 | endpoint_method=http_method or "GET", 60 | endpoint_path=route, 61 | authentication_information=context_text, 62 | ) 63 | 64 | messages = [ 65 | {"role": "system", "content": _SYSTEM_PROMPT}, 66 | {"role": "user", "content": prompt}, 67 | ] 68 | 69 | last_error: Optional[Exception] = None 70 | for _ in range(3): 71 | response = openai_ai_client.call_chat_completion( 72 | messages=messages, temperature=0 73 | ) 74 | swagger_json_block = _extract_json_block(response) 75 | if not swagger_json_block: 76 | last_error = ValueError("LLM response did not contain JSON payload.") 77 | continue 78 | try: 79 | return json.loads(swagger_json_block) 80 | except json.JSONDecodeError as exc: 81 | last_error = exc 82 | continue 83 | 84 | error_message = ( 85 | "Failed to parse Swagger JSON from LLM response after multiple attempts." 86 | ) 87 | if last_error: 88 | raise ValueError(error_message) from last_error 89 | raise ValueError(error_message) 90 | -------------------------------------------------------------------------------- /faiss_index_generator.py: -------------------------------------------------------------------------------- 1 | from langchain_text_splitters import RecursiveCharacterTextSplitter, Language 2 | from langchain.vectorstores import FAISS 3 | from llm_client import OpenAiClient 4 | from utils import num_tokens_from_string 5 | 6 | 7 | class GenerateFaissIndex: 8 | def __init__(self): 9 | self.openai_client = OpenAiClient() 10 | 11 | def create_faiss_index(self, file_paths, framework): 12 | if framework == "ruby_on_rails": 13 | text_splitter = RecursiveCharacterTextSplitter.from_language( 14 | chunk_size=2000, 15 | chunk_overlap=200, language=Language.RUBY 16 | ) 17 | elif framework == "express": 18 | text_splitter = RecursiveCharacterTextSplitter.from_language( 19 | chunk_size=2000, 20 | chunk_overlap=200, language=Language.JS 21 | ) 22 | elif framework == "django" or framework == "flask" or framework == "fastapi": 23 | text_splitter = RecursiveCharacterTextSplitter.from_language( 24 | chunk_size=2000, 25 | chunk_overlap=200, language=Language.PYTHON 26 | ) 27 | elif framework == "golang": 28 | text_splitter = RecursiveCharacterTextSplitter.from_language( 29 | chunk_size=2000, 30 | chunk_overlap=200, language=Language.GO 31 | ) 32 | else: 33 | text_splitter = RecursiveCharacterTextSplitter( 34 | chunk_size=2000, 35 | chunk_overlap=200 36 | ) 37 | texts = [] 38 | metadata = [] 39 | 40 | for file in file_paths: 41 | with open(file, 'r', encoding='utf-8') as file: 42 | file_content = file.read() 43 | chunks = text_splitter.split_text(file_content) 44 | texts.extend(chunks) 45 | metadata.extend([{'file_path': str(file)}] * len(chunks)) 46 | all_indices = [] 47 | batch = [] 48 | batch_meta = [] 49 | batch_token_count = 0 50 | 51 | for text, meta in zip(texts, metadata): 52 | tokens = num_tokens_from_string(text) 53 | 54 | # Start new batch if adding this text exceeds token limit 55 | if batch_token_count + tokens > 290000: 56 | index = FAISS.from_texts(batch, self.openai_client.embeddings, metadatas=batch_meta) 57 | all_indices.append(index) 58 | batch, batch_meta, batch_token_count = [], [], 0 59 | 60 | batch.append(text) 61 | batch_meta.append(meta) 62 | batch_token_count += tokens 63 | 64 | # Final batch 65 | if batch: 66 | index = FAISS.from_texts(batch, self.openai_client.embeddings, metadatas=batch_meta) 67 | all_indices.append(index) 68 | 69 | # Merge all indices 70 | final_index = all_indices[0] 71 | for idx in all_indices[1:]: 72 | final_index.merge_from(idx) 73 | return FAISS.from_texts(texts, self.openai_client.embeddings, metadatas=metadata) 74 | 75 | @staticmethod 76 | def get_authentication_related_information(faiss_vector_db): 77 | query = "function to handle authentication information and authorization information" 78 | docs = faiss_vector_db.similarity_search(str(query), k=4) 79 | content_list = [doc.page_content.strip() for doc in docs] 80 | return content_list 81 | -------------------------------------------------------------------------------- /user_config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility helpers for capturing and persisting user-specific configuration 3 | for the Swagger Generator CLI. 4 | """ 5 | 6 | import os, json 7 | from config import Configurations 8 | from utils import get_repo_name, get_repo_path 9 | configurations = Configurations() 10 | 11 | # Get JSON config file path from environment variable 12 | config_file = os.environ.get("APIMESH_USER_CONFIG_PATH") 13 | if config_file is None: 14 | raise ValueError( 15 | "APIMESH_USER_CONFIG_PATH environment variable is not set. " 16 | "Please set it to the path of your config.json file." 17 | ) 18 | 19 | # Ensure the directory exists 20 | config_dir = os.path.dirname(config_file) 21 | os.makedirs(config_dir, exist_ok=True) 22 | 23 | class UserConfigurations: 24 | def __init__(self, project_api_key, openai_api_key, ai_chat_id, is_mcp): 25 | self.is_mcp = is_mcp 26 | self.ai_chat_id = ai_chat_id 27 | self.add_user_configs(project_api_key, openai_api_key) 28 | 29 | @staticmethod 30 | def load_user_config(): 31 | if os.path.exists(config_file): 32 | with open(config_file, "r") as file: 33 | return json.load(file) 34 | return {} 35 | 36 | @staticmethod 37 | def save_user_config(config): 38 | with open(config_file, "w") as file: 39 | json.dump(config, file, indent=4) 40 | 41 | @staticmethod 42 | def _sanitize_cli_value(value): 43 | if value is None: 44 | return "" 45 | if isinstance(value, str): 46 | cleaned_value = value.strip() 47 | else: 48 | cleaned_value = str(value).strip() 49 | return cleaned_value if cleaned_value and cleaned_value.lower() != "null" else "" 50 | 51 | @staticmethod 52 | def _print_section_header(title): 53 | line = "=" * max(len(title) + 10, 50) 54 | print(f"\n{line}\n{title}\n{line}") 55 | 56 | def add_user_configs(self, project_api_key, openai_api_key): 57 | user_config = self.load_user_config() 58 | self._print_section_header("OpenAI Credentials") 59 | stored_openai_api_key = user_config.get("openai_api_key", "") 60 | sanitized_openai_api_key = self._sanitize_cli_value(openai_api_key) 61 | if sanitized_openai_api_key: 62 | resolved_openai_api_key = sanitized_openai_api_key 63 | elif not stored_openai_api_key and not self.is_mcp: 64 | resolved_openai_api_key = input( 65 | f"Please enter openai api key (default: {stored_openai_api_key}): ") or stored_openai_api_key 66 | else: 67 | resolved_openai_api_key = stored_openai_api_key 68 | user_config["openai_api_key"] = resolved_openai_api_key 69 | self.save_user_config(user_config) 70 | print(f" ✓ API Key: {resolved_openai_api_key}") 71 | 72 | self._print_section_header("Model Selection") 73 | default_openai_model = user_config.get("openai_model", "gpt-5.1-codex") 74 | openai_model = default_openai_model 75 | user_config["openai_model"] = openai_model 76 | self.save_user_config(user_config) 77 | print(f" ✓ AI Model: {openai_model}") 78 | 79 | self._print_section_header("API Host Configuration") 80 | default_api_host = user_config.get("api_host", "https://api.example.com") 81 | api_host = default_api_host 82 | user_config["api_host"] = api_host 83 | self.save_user_config(user_config) 84 | print(f" ✓ API Host: {api_host}") 85 | # Check if the user entered something 86 | if not api_host.strip(): 87 | print(" ✗ No api host provided. Exiting...") 88 | exit(1) 89 | -------------------------------------------------------------------------------- /swagger_mcp.py: -------------------------------------------------------------------------------- 1 | from mcp.server.fastmcp import FastMCP 2 | from typing import Optional 3 | import os, subprocess, shutil, sys 4 | 5 | APP_NAME = "SwaggerGenerator MCP" 6 | DEFAULT_WORK_DIR = os.path.dirname(os.path.abspath(__file__)) 7 | DEFAULT_SCRIPT_URL = "https://raw.githubusercontent.com/qodex-ai/apimesh/main/bootstrap_mcp_runner.sh" 8 | 9 | mcp = FastMCP(APP_NAME) 10 | 11 | def _require(name: str, val: Optional[str]): 12 | if not val or str(val).strip().lower() == "null": 13 | raise ValueError(f"Missing required parameter: {name}") 14 | 15 | def _need(cmd: str): 16 | if shutil.which(cmd) is None: 17 | raise RuntimeError(f"Missing dependency: {cmd} is not on PATH") 18 | 19 | def _ensure_dir(p: str): 20 | os.makedirs(p, exist_ok=True) 21 | 22 | @mcp.tool() 23 | def run_swagger_generation( 24 | openai_api_key: str, 25 | repo_path: str, 26 | timeout_seconds: int = 900 27 | ) -> dict: 28 | """ 29 | This tool takes the path of the repository, openai_api_key and timeout to generate a openapi spec swagger json for that repo. 30 | """ 31 | _require("openai_api_key", openai_api_key) 32 | _require("repo_path", repo_path) 33 | 34 | for dep in ("bash", "curl", "git", "python3", "pip3"): 35 | _need(dep) 36 | 37 | base_dir = DEFAULT_WORK_DIR 38 | _ensure_dir(base_dir) 39 | 40 | repo_path = os.path.abspath(os.path.expanduser(repo_path)) 41 | if not os.path.isdir(repo_path): 42 | raise ValueError(f"repo_path is not a directory: {repo_path}") 43 | 44 | # --- fetch script (be sure it's a STRING, not a tuple) --- 45 | script_url = DEFAULT_SCRIPT_URL # <-- no trailing comma 46 | script_path = os.path.join(base_dir, "bootstrap_mcp_runner.sh") # <-- no trailing comma 47 | 48 | # debug types to Claude's log 49 | print(f"[mcp] base_dir={base_dir!r} ({type(base_dir)})", file=sys.stderr) 50 | print(f"[mcp] repo_path={repo_path!r} ({type(repo_path)})", file=sys.stderr) 51 | print(f"[mcp] script_url={script_url!r} ({type(script_url)})", file=sys.stderr) 52 | print(f"[mcp] script_path={script_path!r} ({type(script_path)})", file=sys.stderr) 53 | 54 | curl = subprocess.run( 55 | ["curl", "-sSL", script_url, "-o", script_path], 56 | capture_output=True, text=True 57 | ) 58 | if curl.returncode != 0: 59 | raise RuntimeError(f"curl failed ({curl.returncode}): {curl.stderr or curl.stdout}") 60 | 61 | chmod = subprocess.run(["chmod", "+x", script_path], capture_output=True, text=True) 62 | if chmod.returncode != 0: 63 | raise RuntimeError(f"chmod failed ({chmod.returncode}): {chmod.stderr or chmod.stdout}") 64 | 65 | # --- env for the script --- 66 | env = os.environ.copy() 67 | env.update({ 68 | "OPENAI_API_KEY": openai_api_key, 69 | "SWAGGER_BOT_REPO_PATH": repo_path, 70 | "WORK_DIR": base_dir, 71 | }) 72 | 73 | # --- command (ALL ARGS AS STRINGS) --- 74 | cmd = [ 75 | "bash", script_path, 76 | "--repo-path", repo_path, 77 | "--openai-api-key", openai_api_key, 78 | "--project-api-key", "null", 79 | "--ai-chat-id", "null", 80 | "--is-mcp", "true", 81 | ] 82 | print(f"[mcp] running: {cmd} (cwd={base_dir})", file=sys.stderr) 83 | 84 | proc = subprocess.run( 85 | cmd, 86 | cwd=base_dir, 87 | env=env, 88 | capture_output=True, 89 | text=True, 90 | timeout=timeout_seconds, 91 | ) 92 | os.remove(script_path) 93 | 94 | result = { 95 | "exit_code": proc.returncode, 96 | "work_dir": base_dir, 97 | "stdout": proc.stdout[-200_000:], 98 | "stderr": proc.stderr[-200_000:], 99 | } 100 | return result 101 | 102 | if __name__ == "__main__": 103 | print("[mcp] server booted; waiting on stdio", file=sys.stderr) 104 | mcp.run() 105 | -------------------------------------------------------------------------------- /bootstrap_mcp_runner.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | 4 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" 5 | 6 | REPO_URL="${REPO_URL:-https://github.com/qodex-ai/apimesh.git}" 7 | BRANCH_NAME="${BRANCH_NAME:-main}" 8 | REPO_DIR="" 9 | 10 | PROJECT_API_KEY="null" 11 | OPENAI_API_KEY="null" 12 | AI_CHAT_ID="null" 13 | REPO_PATH="$SCRIPT_DIR" 14 | APIMESH_DIR="" 15 | VENV_DIR="" 16 | CLONE_DIR="" 17 | 18 | need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 2; }; } 19 | need bash; need git; need curl; need python3; need pip3 20 | 21 | while [[ $# -gt 0 ]]; do 22 | case "$1" in 23 | --project-api-key) PROJECT_API_KEY="${2:-null}"; shift 2 ;; 24 | --openai-api-key) OPENAI_API_KEY="${2:-null}"; shift 2 ;; 25 | --ai-chat-id) AI_CHAT_ID="${2:-null}"; shift 2 ;; 26 | --repo-path) REPO_PATH="${2:-$REPO_PATH}"; shift 2 ;; 27 | *) echo "Ignoring unknown arg: $1"; shift ;; 28 | esac 29 | done 30 | 31 | if [[ ! -d "$REPO_PATH" ]]; then 32 | echo "Provided --repo-path '$REPO_PATH' is not a directory" >&2 33 | exit 3 34 | fi 35 | 36 | REPO_PATH="$(cd "$REPO_PATH" && pwd)" 37 | APIMESH_DIR="$REPO_PATH/apimesh" 38 | VENV_DIR="$APIMESH_DIR/qodexai-virtual-env" 39 | CLONE_DIR="$APIMESH_DIR/apimesh" 40 | 41 | cleanup() { 42 | local exit_code=$? 43 | trap - EXIT 44 | cd "$SCRIPT_DIR" 45 | 46 | if [[ -n "${VIRTUAL_ENV:-}" ]]; then 47 | deactivate >/dev/null 2>&1 || true 48 | fi 49 | 50 | if [[ -d "$CLONE_DIR" ]]; then 51 | echo "Removing cloned repository at '$CLONE_DIR'" 52 | rm -rf "$CLONE_DIR" 53 | fi 54 | 55 | if [[ -d "$VENV_DIR" ]]; then 56 | echo "Removing virtual environment at '$VENV_DIR'" 57 | rm -rf "$VENV_DIR" 58 | fi 59 | 60 | exit "$exit_code" 61 | } 62 | 63 | trap cleanup EXIT 64 | 65 | mkdir -p "$APIMESH_DIR" 66 | 67 | if [[ -d "$VENV_DIR" ]]; then 68 | echo "Virtual environment already exists at '$VENV_DIR'. Removing it." 69 | rm -rf "$VENV_DIR" 70 | fi 71 | 72 | echo "Creating Python venv at $VENV_DIR" 73 | python3 -m venv "$VENV_DIR" 74 | source "$VENV_DIR/bin/activate" 75 | 76 | pip3 install --upgrade pip 77 | pip3 install \ 78 | "langchain==0.3.16" \ 79 | "langchain-community==0.3.16" \ 80 | "langchain-core==0.3.63" \ 81 | "langchain-openai==0.3.5" \ 82 | "langsmith==0.1.139" \ 83 | "openai==1.76.0" \ 84 | "numpy<2" \ 85 | "tiktoken==0.8.0" \ 86 | "faiss-cpu==1.9.0.post1" \ 87 | "langchain-text-splitters==0.3.4" \ 88 | "pyyaml==6.0.2" \ 89 | "tree-sitter==0.25.1" \ 90 | "tree-sitter-python==0.23.6" \ 91 | "tree-sitter-javascript==0.23.1" \ 92 | "tree-sitter-ruby==0.23.1" \ 93 | "tree-sitter-go==0.25.0" \ 94 | "tree-sitter-typescript==0.23.2" \ 95 | "esprima==4.0.1" \ 96 | "requests" 97 | 98 | # --- repo setup (clone/update specific branch) --- 99 | if [[ -d "$CLONE_DIR/.git" ]]; then 100 | echo "Repo exists, switching to branch '$BRANCH_NAME' and pulling latest..." 101 | git -C "$CLONE_DIR" fetch --prune origin 102 | git -C "$CLONE_DIR" checkout -B "$BRANCH_NAME" "origin/$BRANCH_NAME" 103 | git -C "$CLONE_DIR" pull --ff-only origin "$BRANCH_NAME" 104 | else 105 | echo "Cloning repo branch '$BRANCH_NAME'..." 106 | if [[ -d "$CLONE_DIR" ]]; then 107 | rm -rf "$CLONE_DIR" 108 | fi 109 | git clone --branch "$BRANCH_NAME" --single-branch "$REPO_URL" "$CLONE_DIR" 110 | fi 111 | # --- end repo setup --- 112 | 113 | REPO_DIR="$(cd "$CLONE_DIR" && pwd)" 114 | 115 | export PYTHONPATH="$REPO_PATH:$REPO_DIR:${PYTHONPATH:-}" 116 | export APIMESH_CONFIG_PATH="$REPO_DIR/config.yml" 117 | export APIMESH_USER_CONFIG_PATH="$APIMESH_DIR/config.json" 118 | export APIMESH_USER_REPO_PATH="$REPO_PATH" 119 | export APIMESH_OUTPUT_FILEPATH="$APIMESH_DIR/swagger.json" 120 | 121 | 122 | cd "$REPO_DIR" 123 | python3 -m swagger_generation_cli "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID" true 124 | 125 | exit 0 126 | -------------------------------------------------------------------------------- /config.yml: -------------------------------------------------------------------------------- 1 | # config.yml 2 | ignored_dirs: 3 | - .git 4 | - node_modules 5 | - venv 6 | - __pycache__ 7 | - build 8 | - dist 9 | - tests 10 | - test 11 | - docs 12 | - examples 13 | - migrations 14 | - tmp 15 | - vendor 16 | - app/assets 17 | - develop-eggs 18 | - downloads 19 | - eggs 20 | - .eggs 21 | - lib64 22 | - parts 23 | - sdist 24 | - var 25 | - wheels 26 | - .egg-info 27 | - .env 28 | - .venv 29 | - ENV 30 | - .python-version 31 | - .pytest_cache 32 | - .coverage 33 | - htmlcov 34 | - log 35 | - db 36 | - public 37 | - coverage 38 | - spec 39 | - bundle 40 | - .rvmrc 41 | - .byebug_history 42 | - storage 43 | - .pnp 44 | - .env.local 45 | - .env.development.local 46 | - .env.test.local 47 | - .env.production.local 48 | - .next 49 | - .nuxt 50 | - .DS_Store 51 | - .tscache 52 | - .angular 53 | - dist-types 54 | - target 55 | - .apt_generated 56 | - .classpath 57 | - .factorypath 58 | - .project 59 | - .settings 60 | - .springBeans 61 | - .sts4-cache 62 | - .gradle 63 | - logs 64 | - .idea 65 | - .vscode 66 | - qodexai-virtual-env 67 | - apimesh 68 | 69 | routing_patterns_map: 70 | ruby_on_rails: 71 | - '\bresources\b.*:' 72 | - 'namespace\b.*''' 73 | - 'Rails\.application\.routes\.draw' 74 | - 'root\s+(?:''|")' 75 | - 'get\s+[''"]/\w+' 76 | - 'post\s+[''"]/\w+' 77 | - 'put\s+[''"]/\w+' 78 | - 'delete\s+[''"]/\w+' 79 | 80 | django: 81 | - 'path\([''"]' 82 | - 'include\([''"]' 83 | - 'url\([''"]' 84 | - 'urlpatterns\s*=' 85 | - '@route\([''"]' 86 | - '\.(?:get|post|put|delete)_api\(' 87 | - '@api_view\(\[[''""](?:GET|POST|PUT|DELETE)[''"]' 88 | - "ListAPIView" 89 | - "CreateAPIView" 90 | - "UpdateAPIView" 91 | - "DestroyAPIView" 92 | 93 | express: 94 | - 'app\.(?:get|post|put|delete)\([''"]' 95 | - 'router\.(?:get|post|put|delete)\([''"]' 96 | - 'express\.Router\(\)' 97 | - 'app\.use\([''"]' 98 | 99 | flask: 100 | - '@app\.route\([''"]' 101 | - 'app\.(?:get|post|put|delete)\([''"]' 102 | - '@blueprint\.route\([''"]' 103 | - 'flask\.Blueprint\(' 104 | - 'app\.register_blueprint\(' 105 | - '@\w+\.route\([''"]' 106 | - 'Api\(' 107 | - 'Resource\)' 108 | - 'def (?:get|post|put|delete)\(' 109 | 110 | fastapi: 111 | - '@app\.(?:get|post|put|delete)\([''"]' 112 | - '@router\.(?:get|post|put|delete)\([''"]' 113 | - 'APIRouter\(\)' 114 | - 'app\.include_router\(' 115 | - '@app\.middleware\([''"]' 116 | 117 | laravel: 118 | - 'Route::(?:get|post|put|delete)\([''"]' 119 | - 'Route::resource\([''"]' 120 | - 'Route::group\(' 121 | - '->middleware\([''"]' 122 | 123 | spring: 124 | - '@RequestMapping\([''"]' 125 | - '@GetMapping\([''"]' 126 | - '@PostMapping\([''"]' 127 | - '@PutMapping\([''"]' 128 | - '@DeleteMapping\([''"]' 129 | - "@RestController" 130 | - "@Controller" 131 | - "@RequestParam" 132 | - "@PathVariable" 133 | 134 | golang: 135 | # net/http (standard library) 136 | - 'http\.HandleFunc\([''"]' 137 | - 'http\.Handle\([''"]' 138 | - 'http\.ServeMux\{' 139 | 140 | # gorilla/mux 141 | - 'mux\.HandleFunc\([''"]' 142 | - 'mux\.Handle\([''"]' 143 | - 'mux\.NewRouter\(' 144 | 145 | # gin-gonic/gin 146 | - 'gin\.Default\(' 147 | - 'gin\.New\(' 148 | - '\b\w+\.(GET|POST|PUT|DELETE|PATCH|OPTIONS|HEAD)\([''"]' 149 | 150 | # echo 151 | - 'echo\.New\(' 152 | - '\b\w+\.(GET|POST|PUT|DELETE|PATCH|OPTIONS|HEAD)\([''"]' 153 | 154 | # fiber 155 | - 'fiber\.New\(' 156 | - '\b\w+\.(Get|Post|Put|Delete|Patch|Options|Head)\([''"]' 157 | 158 | nestjs: 159 | - '@Controller\\([''"]' 160 | - '@(Get|Post|Put|Delete|Patch|Options|Head)\\(' 161 | - 'NestFactory\\.create\\(' 162 | - 'app\\.select\\(' 163 | - 'app\\.useGlobalPipes\\(' 164 | - '@Module\\(' 165 | 166 | gpt_4o_model_name: "gpt-5.1-codex" 167 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | # Configuration 6 | APIMESH_DIR="apimesh" 7 | VENV_DIR="$APIMESH_DIR/qodexai-virtual-env" 8 | REPO_URL="https://github.com/qodex-ai/apimesh.git" 9 | CLONE_DIR="$APIMESH_DIR/apimesh" 10 | CURRENT_DIR="$(pwd)" 11 | 12 | # Default values for optional parameters 13 | OPENAI_API_KEY="" 14 | PROJECT_API_KEY="" 15 | AI_CHAT_ID="" 16 | 17 | # Parse command line arguments 18 | while [[ $# -gt 0 ]]; do 19 | case "$1" in 20 | --openai-api-key) 21 | OPENAI_API_KEY="$2" 22 | shift 2 23 | ;; 24 | --project-api-key) 25 | PROJECT_API_KEY="$2" 26 | shift 2 27 | ;; 28 | --ai-chat-id) 29 | AI_CHAT_ID="$2" 30 | shift 2 31 | ;; 32 | *) 33 | echo "Unknown option: $1" 34 | echo "Usage: $0 [--openai-api-key KEY] [--project-api-key KEY] [--ai-chat-id ID]" 35 | exit 1 36 | ;; 37 | esac 38 | done 39 | 40 | # Cleanup function 41 | cleanup() { 42 | local exit_code=$? 43 | trap - EXIT 44 | 45 | # Deactivate virtual environment if active 46 | if command -v deactivate >/dev/null 2>&1; then 47 | deactivate >/dev/null 2>&1 || true 48 | fi 49 | 50 | # Remove cloned repository 51 | if [[ -d "$CLONE_DIR" ]]; then 52 | echo "Removing cloned repository at '$CLONE_DIR'" 53 | rm -rf "$CLONE_DIR" 54 | fi 55 | 56 | # Remove virtual environment 57 | if [[ -d "$VENV_DIR" ]]; then 58 | echo "Removing virtual environment at '$VENV_DIR'" 59 | rm -rf "$VENV_DIR" 60 | fi 61 | 62 | # Remove this script itself (last step) 63 | if [[ -f "$0" ]]; then 64 | echo "Removing script '$0'" 65 | rm -f "$0" 66 | fi 67 | 68 | exit "$exit_code" 69 | } 70 | 71 | # Set trap for cleanup on exit 72 | trap cleanup EXIT 73 | 74 | # Step 1: Create apimesh folder 75 | echo "Creating apimesh folder..." 76 | mkdir -p "$APIMESH_DIR" 77 | echo "Created folder: $APIMESH_DIR" 78 | echo "" 79 | 80 | # Step 2: Create Python virtual environment 81 | echo "Creating Python virtual environment..." 82 | if [[ -d "$VENV_DIR" ]]; then 83 | echo "Virtual environment already exists at '$VENV_DIR'. Removing it..." 84 | rm -rf "$VENV_DIR" 85 | fi 86 | 87 | python3 -m venv "$VENV_DIR" 88 | echo "Virtual environment created at '$VENV_DIR'" 89 | echo "" 90 | 91 | # Step 3: Activate virtual environment and install dependencies 92 | echo "Activating virtual environment..." 93 | source "$VENV_DIR/bin/activate" 94 | echo "Virtual environment activated" 95 | echo "" 96 | 97 | echo "Installing Python dependencies..." 98 | pip3 install --quiet --upgrade pip 99 | pip3 install \ 100 | "langchain==0.3.16" \ 101 | "langchain-community==0.3.16" \ 102 | "langchain-core==0.3.63" \ 103 | "langchain-openai==0.3.5" \ 104 | "langsmith==0.1.139" \ 105 | "openai==1.76.0" \ 106 | "tiktoken==0.8.0" \ 107 | "faiss-cpu==1.9.0.post1" \ 108 | "langchain-text-splitters==0.3.4" \ 109 | "pyyaml==6.0.2" \ 110 | "numpy<2" \ 111 | "tree-sitter==0.25.1" \ 112 | "tree-sitter-python==0.23.6" \ 113 | "tree-sitter-javascript==0.23.1" \ 114 | "tree-sitter-ruby==0.23.1" \ 115 | "tree-sitter-go==0.25.0" \ 116 | "tree-sitter-typescript==0.23.2" \ 117 | "esprima==4.0.1" \ 118 | "requests" 119 | echo "Dependencies installed" 120 | echo "" 121 | 122 | # Step 4: Clone the repository 123 | echo "Cloning repository from $REPO_URL..." 124 | if [[ -d "$CLONE_DIR" ]]; then 125 | echo "Repository already exists at '$CLONE_DIR'. Removing it..." 126 | rm -rf "$CLONE_DIR" 127 | fi 128 | 129 | git clone "$REPO_URL" "$CLONE_DIR" 130 | echo "Repository cloned to '$CLONE_DIR'" 131 | echo "" 132 | 133 | # Step 5: Run the swagger generation CLI 134 | echo "Running swagger generation CLI..." 135 | echo "REPO_PATH: $CURRENT_DIR" 136 | echo "OPENAI_API_KEY: ${OPENAI_API_KEY:+***}" 137 | echo "PROJECT_API_KEY: ${PROJECT_API_KEY:+***}" 138 | echo "AI_CHAT_ID: ${AI_CHAT_ID:+***}" 139 | echo "" 140 | 141 | # Add current directory and cloned directory to PYTHONPATH so Python can find modules 142 | export PYTHONPATH="$CURRENT_DIR:$CLONE_DIR:$PYTHONPATH" 143 | 144 | # Set config paths 145 | export APIMESH_CONFIG_PATH="$CLONE_DIR/config.yml" 146 | export APIMESH_USER_CONFIG_PATH="$CURRENT_DIR/apimesh/config.json" 147 | export APIMESH_USER_REPO_PATH="$CURRENT_DIR" 148 | export APIMESH_OUTPUT_FILEPATH="$CURRENT_DIR/apimesh/swagger.json" 149 | 150 | python3 -m apimesh.apimesh.swagger_generation_cli "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID" 151 | 152 | CLI_EXIT_CODE=$? 153 | 154 | echo "" 155 | echo "Swagger generation finished with status $CLI_EXIT_CODE." 156 | 157 | # Cleanup will happen automatically via trap 158 | exit "$CLI_EXIT_CODE" 159 | -------------------------------------------------------------------------------- /endpoints_extractor.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from llm_client import OpenAiClient 3 | from config import Configurations 4 | import prompts 5 | from concurrent.futures import ThreadPoolExecutor, as_completed 6 | import time 7 | 8 | config = Configurations() 9 | 10 | class EndpointsExtractor: 11 | def __init__(self): 12 | self.openai_client = OpenAiClient() 13 | 14 | def extract_endpoints_with_gpt(self, file_path, framework): 15 | print("\n***************************************************") 16 | print(f"Started finding endpoints for {file_path}") 17 | with open(file_path, 'r', encoding='utf-8') as file: 18 | file_content = file.read() 19 | if framework == "ruby_on_rails": 20 | content = prompts.ruby_on_rails_endpoint_extractor_prompt.format(file_content = file_content) 21 | messages = [ 22 | {"role": "system", "content": prompts.ruby_on_rails_endpoint_extractor_system_prompt}, 23 | {"role": "user", "content": content} 24 | ] 25 | elif framework == "express": 26 | content = prompts.express_endpoint_extractor_prompt.format(file_content = file_content) 27 | messages = [ 28 | {"role": "system", "content": prompts.express_endpoint_extractor_system_prompt}, 29 | {"role": "user", "content": content} 30 | ] 31 | elif framework == "django": 32 | content = prompts.django_endpoint_extractor_prompt.format(file_content = file_content) 33 | messages = [ 34 | {"role": "system", "content": prompts.django_endpoint_extractor_system_prompt}, 35 | {"role": "user", "content": content} 36 | ] 37 | 38 | elif framework == "flask": 39 | content = prompts.flask_endpoint_extractor_prompt.format(file_content = file_content) 40 | messages = [ 41 | {"role": "system", "content": prompts.flask_endpoint_extractor_system_prompt}, 42 | {"role": "user", "content": content} 43 | ] 44 | elif framework == "fastapi": 45 | content = prompts.fastapi_endpoint_extractor_prompt.format(file_content = file_content) 46 | messages = [ 47 | {"role": "system", "content": prompts.fastapi_endpoint_extractor_system_prompt}, 48 | {"role": "user", "content": content} 49 | ] 50 | 51 | elif framework == "golang": 52 | content = prompts.golang_endpoint_extractor_prompt.format(file_content=file_content) 53 | messages = [ 54 | {"role": "system", "content": prompts.golang_endpoint_extractor_system_prompt}, 55 | {"role": "user", "content": content} 56 | ] 57 | # Call the OpenAI API 58 | response = self.openai_client.call_chat_completion(messages=messages, temperature=0) 59 | start = response.find('[') 60 | end = response.rfind(']') + 1 61 | json_like_string = response[start:end] 62 | 63 | try: 64 | # Convert the JSON-like string to a Python list 65 | parsed_list = ast.literal_eval(json_like_string) 66 | except (ValueError, SyntaxError): 67 | print("Error parsing JSON-like string from GPT response") 68 | parsed_list = [] 69 | 70 | print(f"Completed finding endpoints for {file_path}") 71 | return parsed_list 72 | 73 | @staticmethod 74 | def get_endpoint_related_information(faiss_vector_db, endpoints): 75 | print("\n***************************************************") 76 | print(f"Started generating endpoint related information for {len(endpoints)} endpoints") 77 | start_time = time.time() 78 | completed = 0 79 | 80 | def process_endpoint(endpoint): 81 | query = f"This is the Method: {endpoint['method']} and this is the Endpoint Path: {endpoint['path']} fetch the controller information for the endpoint." 82 | docs = faiss_vector_db.similarity_search(str(query), k=4) 83 | content_list = [doc.page_content.strip() for doc in docs] 84 | return {'method': endpoint['method'], 'path': endpoint['path'], 'info': content_list} 85 | 86 | endpoint_related_content = [] 87 | with ThreadPoolExecutor(max_workers=8) as executor: 88 | future_to_endpoint = {executor.submit(process_endpoint, endpoint): endpoint 89 | for endpoint in endpoints} 90 | 91 | for future in as_completed(future_to_endpoint): 92 | endpoint_related_content.append(future.result()) 93 | completed += 1 94 | end_time = time.time() 95 | print( 96 | f"Completed generating endpoint related information for {completed} endpoints in {int(end_time - start_time)} seconds", 97 | end="\r") 98 | 99 | return endpoint_related_content 100 | -------------------------------------------------------------------------------- /python_pipeline/identify_api_functions.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import ast 3 | import json 4 | 5 | API_DECORATOR_NAMES = { 6 | 'route', 'get', 'post', 'put', 'delete', 'patch', 7 | 'api', 'endpoint', 'router', 'viewset', 'view' 8 | } 9 | 10 | def has_api_decorator(decorator_node): 11 | if isinstance(decorator_node, ast.Call) and hasattr(decorator_node.func, 'attr'): 12 | if decorator_node.func.attr.lower() in API_DECORATOR_NAMES: 13 | return True 14 | if isinstance(decorator_node, ast.Attribute): 15 | if decorator_node.attr.lower() in API_DECORATOR_NAMES: 16 | return True 17 | if isinstance(decorator_node, ast.Name): 18 | if decorator_node.id.lower() in API_DECORATOR_NAMES: 19 | return True 20 | return False 21 | 22 | 23 | def extract_route_from_decorator(decorator_node): 24 | if isinstance(decorator_node, ast.Call): 25 | if decorator_node.args: 26 | first_arg = decorator_node.args[0] 27 | if isinstance(first_arg, ast.Str): 28 | return first_arg.s 29 | elif isinstance(first_arg, ast.Constant): # For Python 3.8+ 30 | if isinstance(first_arg.value, str): 31 | return first_arg.value 32 | return None 33 | 34 | 35 | def find_api_endpoints(file_path): 36 | try: 37 | source = file_path.read_text(encoding='utf-8') 38 | tree = ast.parse(source, filename=str(file_path)) 39 | except Exception: 40 | return [] 41 | endpoints = [] 42 | class_endpoints = {} 43 | for node in ast.walk(tree): 44 | if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and not isinstance(getattr(node, 'parent', None), 45 | ast.ClassDef): 46 | for dec in node.decorator_list: 47 | if has_api_decorator(dec): 48 | route = extract_route_from_decorator(dec) 49 | endpoints.append({ 50 | "type": "function", 51 | "name": node.name, 52 | "start_line": node.lineno, 53 | "end_line": getattr(node, 'end_lineno', None), 54 | "route": route, 55 | "file_path": str(file_path) 56 | }) 57 | if isinstance(node, ast.ClassDef): 58 | class_has_decorator = any(has_api_decorator(dec) for dec in node.decorator_list) 59 | class_route = None 60 | for dec in node.decorator_list: 61 | if has_api_decorator(dec): 62 | class_route = extract_route_from_decorator(dec) 63 | break 64 | if class_has_decorator: 65 | class_endpoint = { 66 | "type": "class", 67 | "name": node.name, 68 | "start_line": node.lineno, 69 | "end_line": getattr(node, 'end_lineno', None), 70 | "route": class_route, 71 | "file_path": str(file_path), 72 | "methods": [] 73 | } 74 | class_endpoints[node.name] = class_endpoint 75 | endpoints.append(class_endpoint) 76 | for body_item in node.body: 77 | if isinstance(body_item, (ast.FunctionDef, ast.AsyncFunctionDef)): 78 | method_route = None 79 | method_has_decorator = any(has_api_decorator(dec) for dec in body_item.decorator_list) 80 | if method_has_decorator: 81 | for dec in body_item.decorator_list: 82 | if has_api_decorator(dec): 83 | method_route = extract_route_from_decorator(dec) 84 | if method_route: 85 | break 86 | if method_has_decorator or class_has_decorator: 87 | method_entry = { 88 | "type": "method", 89 | "name": body_item.name, 90 | "start_line": body_item.lineno, 91 | "end_line": getattr(body_item, 'end_lineno', None), 92 | "route": method_route if method_route else class_route, 93 | "file_path": str(file_path) 94 | } 95 | if node.name in class_endpoints: 96 | class_endpoints[node.name]["methods"].append(method_entry) 97 | return endpoints 98 | 99 | 100 | def set_parents(tree): 101 | for node in ast.walk(tree): 102 | for child in ast.iter_child_nodes(node): 103 | child.parent = node 104 | 105 | 106 | if __name__ == "__main__": 107 | api_files = ['/Users/ankits/PycharmProjects/data-science-model-serving/app.py', '/Users/ankits/PycharmProjects/data-science-model-serving/apps/training/run.py', '/Users/ankits/PycharmProjects/data-science-model-serving/apps/prediction/run.py'] 108 | py_files = [Path(file) for file in api_files] # Convert to Path objects 109 | all_endpoints = [] 110 | for py_file in py_files: 111 | try: 112 | source = py_file.read_text(encoding="utf-8") 113 | tree = ast.parse(source) 114 | set_parents(tree) 115 | eps = find_api_endpoints(py_file) 116 | if eps: 117 | all_endpoints.extend(eps) 118 | except Exception: 119 | continue 120 | print(json.dumps(all_endpoints, indent=2)) -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | support@qodex.ai. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), [version 2.0](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html). 118 | 119 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 120 | enforcement ladder](https://github.com/mozilla/diversity). 121 | 122 | 123 | - For answers to common questions about this code of conduct, see the FAQs at 124 | https://www.contributor-covenant.org/faq. 125 | - Translations are available at 126 | https://www.contributor-covenant.org/translations. 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ApiMesh: Code to OpenAPI Docs, Instantly 2 | 3 | [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/) 4 | [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE) 5 | [![Docker Build](https://img.shields.io/badge/docker%20build-passing-22c55e?logo=docker&logoColor=white)](https://github.com/qodex-ai/apimesh/actions/workflows/docker-build.yml) 6 | [![Tutorial](https://img.shields.io/badge/%F0%9F%93%96%20tutorial-get%20started-brightgreen.svg)](#quick-start-30-seconds) 7 | [![Examples](https://img.shields.io/badge/%F0%9F%9A%80%20examples-ready--to--run-orange.svg)](#quick-start-30-seconds) 8 | [![Discord](https://img.shields.io/badge/Discord-Join%20Community-5865f2?logo=discord&logoColor=white)](https://discord.gg/MHDayrP7) 9 | [![Twitter](https://img.shields.io/badge/Twitter-Follow%20Updates-1da1f2?logo=x&logoColor=white)](https://x.com/qodex_ai) 10 | 11 | **Open-Source OpenAPI Generator** – Automatically scan your codebase, generate **accurate OpenAPI 3.0 specs**, and render a **beautiful interactive HTML API UI** for instant exploration. 12 | 13 | **From code to live API docs in seconds** — no manual writing, no drift, no hassle. 14 | 15 | --- 16 | 17 | ## Overview 18 | 19 | **ApiMesh** is the AI-powered open-source tool that: 20 | 21 | - Scans your codebase automatically. 22 | - Discovers all REST API endpoints, parameters, auth, and schemas. 23 | - Generates a **valid `swagger.json` (OpenAPI 3.0)**. 24 | - **Renders `apimesh-docs.html`** — a **fully interactive API UI** powered by Swagger UI. 25 | 26 | ![img.png](img.png) 27 | > **Open the HTML file in any browser. No server. No setup. Just click and explore.** 28 | 29 | --- 30 | 31 | ### ✨ Key Features 32 | 33 | | Feature | Benefit | 34 | |-------|--------| 35 | | 🔍 **Smart Code Discovery** | Finds endpoints across frameworks — no annotations needed | 36 | | 📄 **OpenAPI 3.0 Spec** | `swagger.json` ready for CI/CD, gateways, and tools | 37 | | 🌐 **Interactive HTML UI** | `apimesh-docs.html` — **instant API playground** with try-it-out | 38 | | 🌍 **Multi-Language** | Python, Node.js, Ruby on Rails, Go, and more | 39 | | ⚡ **Zero Config Start** | One command → full docs + UI | 40 | | 📱 **Self-Contained HTML** | Share via email, GitHub, or CDN — works offline | 41 | 42 | --- 43 | 44 | ### 🧠 How It Works 45 | 46 | A **precise, AI-augmented pipeline** ensures reliable, up-to-date docs: 47 | 48 | 1. **Scan Repo** → `FileScanner` walks your code (respects `.gitignore` + `config.yml`) 49 | 2. **Detect Framework** → Heuristics + LLM identify Express, FastAPI, Rails, etc. 50 | 3. **Harvest Endpoints** → Native parsers + LLM extract routes, methods, schemas 51 | 4. **Enrich Context** → Vector embeddings pull auth, models, examples per endpoint 52 | 5. **Generate Spec** → `swagger.json` built with OpenAI precision 53 | 6. **Render UI** → **`apimesh-docs.html`** embedded with **Swagger UI** — fully interactive 54 | 7. **Optional Sync** → Push to **Qodex.ai** for auto-tests and security scans 55 | 56 | --- 57 | 58 | ### 🌐 Supported Languages & Frameworks 59 | 60 | | Language | Frameworks | Detection Method | 61 | |--------|------------|------------------| 62 | | **Python** | Django, Flask, FastAPI, DRF | Route files + decorators | 63 | | **Node.js / TS** | Express, NestJS | `app.get`, `Router`, decorators | 64 | | **Ruby on Rails** | Rails | `routes.rb` + controllers | 65 | | **Go** | Gin, Echo, Fiber, Chi, Gorilla Mux, net/http | Tree-sitter router analysis | 66 | | **Java, etc.** | Any REST | LLM fallback + patterns | 67 | 68 | > Add custom patterns in `config.yml` — PRs welcome! 69 | 70 | --- 71 | 72 | ### 📂 Output Files 73 | 74 | | File | Location | Purpose | 75 | |------|----------|--------| 76 | | `swagger.json` | `apimesh/swagger.json` | OpenAPI 3.0 spec | 77 | | **`apimesh-docs.html`** | `apimesh/apimesh-docs.html` | **Interactive API UI** — open in browser | 78 | | `config.json` | `apimesh/config.json` | Persisted CLI configuration (repo path, host, API keys) | 79 | | `config.yml` | Repo root | Customize scan, host, ignores | 80 | 81 | > **Deploy `apimesh-docs.html` to GitHub Pages, Netlify, or Vercel in 1 click.** 82 | 83 | --- 84 | 85 | ## Quick Start (30 Seconds) 86 | 87 | ### Option 1: docker (Recommended) 88 | 89 | Navigate to your repository 90 | ```bash 91 | cd /path/to/your/repo 92 | ``` 93 | 94 | Run interactively - will prompt for any missing inputs 95 | ```bash 96 | docker run --pull always -it --rm -v $(pwd):/workspace qodexai/apimesh:latest 97 | ``` 98 | 99 | ### Option 2: Using MCP 100 | 101 | Download the MCP server file 102 | 103 | ```bash 104 | curl https://raw.githubusercontent.com/qodex-ai/apimesh/main/swagger_mcp.py -o swagger_mcp.py 105 | ``` 106 | 107 | Add this to your MCP settings 108 | ```bash 109 | { 110 | "mcpServers": { 111 | "apimesh": { 112 | "command": "uv", 113 | "args": ["run", "/path/to/swagger_mcp/swagger_mcp.py"] 114 | } 115 | } 116 | } 117 | ``` 118 | 119 | Replace /path/to/swagger_mcp/swagger_mcp.py with the actual file path. 120 | 121 | 122 | ### Option 3: Curl 123 | 124 | Navigate to your repository 125 | ```bash 126 | cd /path/to/your/repo 127 | ``` 128 | 129 | Inside your repo root 130 | ```bash 131 | mkdir -p apimesh && \ 132 | curl -sSL https://raw.githubusercontent.com/qodex-ai/apimesh/refs/heads/main/run.sh -o apimesh/run.sh && \ 133 | chmod +x apimesh/run.sh && apimesh/run.sh 134 | ``` 135 | 136 | > Each run leaves `swagger.json`, `apimesh-docs.html`, `run.sh`, and `config.json` side-by-side inside the `apimesh/` workspace folder. 137 | 138 | --- 139 | 140 | ## 🤝 Contributing 141 | 142 | Contributions are welcome! 143 | 144 | Open an issue for bugs, feature requests, or improvements. 145 | 146 | Submit PRs to enhance language/framework coverage. 147 | 148 | Help us make API documentation automatic and effortless 🚀 149 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Swagger Generator 2 | 3 | First off — thanks for taking the time to contribute! 🎉 4 | This document explains how to propose changes, report issues, and help improve the project. 5 | 6 | > **Project summary:** Swagger Generator analyzes a codebase and produces an OpenAPI (Swagger) JSON. You can run it via a one-liner shell script or as an MCP server. See the [README](./README.md) for setup and usage details. 7 | 8 | --- 9 | 10 | ## 📜 Code of Conduct 11 | 12 | By participating, you agree to uphold our [Code of Conduct](./CODE_OF_CONDUCT.md). 13 | If you witness or experience unacceptable behavior, please report it per that document. 14 | 15 | ## 🔒 Security 16 | 17 | Please **do not** open public issues for security vulnerabilities. 18 | Follow the responsible disclosure process in our [Security Policy](./security.md). 19 | 20 | ## 🪪 License 21 | 22 | By contributing, you agree that your contributions will be licensed under the 23 | [AGPL-3.0 License](./LICENSE.md). 24 | 25 | --- 26 | 27 | ## 🧭 How to Contribute 28 | 29 | ### 1) Report bugs & request features 30 | - Search existing [Issues](https://github.com/qodex-ai/apimesh/issues) first. 31 | - If none exist, open a new issue with: 32 | - **What happened** and **what you expected** 33 | - **Steps to reproduce** (repo, command, flags, logs) 34 | - Environment details (OS, Python version, shell) 35 | 36 | ### 2) Propose improvements 37 | - For larger changes, open an issue first to discuss design/approach. 38 | - Small fixes (typos, docs, comments) can go straight to a PR. 39 | 40 | --- 41 | 42 | ## 🛠️ Development Setup 43 | 44 | > The repo primarily contains Python and a couple of shell scripts. You can run the tool either via the helper script or directly as an MCP server. 45 | 46 | ### Prerequisites 47 | - A recent Python 3.x 48 | - Git + a shell (bash/zsh) 49 | - (Optional) [uv](https://docs.astral.sh/uv/) or a virtual environment tool 50 | 51 | ### Get the code 52 | ```bash 53 | git clone https://github.com/qodex-ai/apimesh.git 54 | cd apimesh 55 | ``` 56 | 57 | ### Running the generator (two common paths) 58 | 59 | **A) One-liner script (quickest)** 60 | Create a dedicated `apimesh` workspace folder inside the repo you want to analyze. 61 | ```bash 62 | # Fetch and run the helper script (see README for the latest command/flags) 63 | mkdir -p apimesh 64 | cd apimesh 65 | curl -sSL https://raw.githubusercontent.com/qodex-ai/apimesh/refs/heads/main/run.sh -o run.sh 66 | chmod +x run.sh 67 | ./run.sh --repo-path .. --project-api-key {project_api_key} --ai-chat-id {ai_chat_id} 68 | ``` 69 | 70 | > After completion you should always see `config.json`, `swagger.json`, `apimesh-docs.html`, and `run.sh` inside your repo's `apimesh/` workspace. 71 | 72 | > The bootstrap helper removes its temporary clone and virtual environment after it finishes generating docs, so rerun the snippet whenever you need to refresh the output. 73 | 74 | **B) Run as an MCP server** 75 | ```bash 76 | # Fetch the MCP server file if needed 77 | # (If you already have it locally from the clone, point to that path instead) 78 | wget https://raw.githubusercontent.com/qodex-ai/apimesh/main/swagger_mcp.py -O swagger_mcp.py 79 | 80 | # Example MCP client config snippet (adjust path/command to your setup) 81 | # { 82 | # "mcpServers": { 83 | # "apimesh": { 84 | # "command": "uv", 85 | # "args": ["run", "/absolute/path/to/swagger_mcp.py"] 86 | # } 87 | # } 88 | # } 89 | ``` 90 | 91 | > After running, you should see a `swagger.json` emitted in the target repo path. 92 | 93 | --- 94 | 95 | ## 🧹 Style, Linting & Commit Messages 96 | 97 | We aim for clear, readable Python and tidy shell scripts. 98 | 99 | - **Python** 100 | - Prefer small, focused functions. 101 | - Add docstrings and inline comments where logic is non-obvious. 102 | - Keep imports organized and avoid unused imports. 103 | - **Shell** 104 | - Use `set -euo pipefail` for robustness when appropriate. 105 | - Quote variables; avoid bashisms if not needed. 106 | 107 | **Commit messages** 108 | - Use present tense and be descriptive: 109 | `feat: add repository path validation`, `fix: handle empty swagger output`, `docs: clarify MCP setup` 110 | - Reference issues when applicable: `Fixes #123` 111 | 112 | --- 113 | 114 | ## ✅ Pull Request Checklist 115 | 116 | Before you open a PR: 117 | 118 | - [ ] The change is documented (README or inline comments as needed). 119 | - [ ] Scripts still work (`run.sh`, `bootstrap_mcp_runner.sh` if applicable). 120 | - [ ] Any new flags or behavior are reflected in the README examples. 121 | - [ ] Code is reasonably linted/typed (if you added type hints). 122 | - [ ] Tests added or manual test steps documented (see below). 123 | - [ ] No secrets or API keys committed. 124 | 125 | Open your PR against the `main` branch and fill out the template (or describe): 126 | - **What** the change does 127 | - **Why** it’s needed 128 | - **How** you validated it 129 | 130 | --- 131 | 132 | ## 🧪 Testing Changes 133 | 134 | This project currently relies primarily on **manual validation**. Please include a short note in your PR describing how you tested: 135 | 136 | **Suggested manual test flow** 137 | 1. Choose a small public repo with a few HTTP endpoints (or a simple local sample). 138 | 2. Run the generator using your change (script or MCP path). 139 | 3. Verify a `swagger.json` was generated. 140 | 4. Open it in Swagger UI / an OpenAPI viewer to confirm endpoints, paths, and schemas look correct. 141 | 5. Try edge cases your change might affect (e.g., unusual file layout, multiple languages, missing dependencies). 142 | 143 | If you add unit tests: 144 | - Place them under a `tests/` folder. 145 | - Keep tests hermetic; avoid requiring network access whenever possible. 146 | 147 | --- 148 | 149 | ## 🧱 Project Structure (high level) 150 | 151 | - `swagger_mcp.py` — MCP server entry and core orchestration. 152 | - `legacy_swagger_pipeline.py`, `run.sh`, `bootstrap_mcp_runner.sh` — runner/helper scripts. 153 | - `ruby_dependencies.py` — language-specific helpers (example). 154 | - `README.md`, `CODE_OF_CONDUCT.md`, `security.md`, `LICENSE.md` — docs & policies. 155 | 156 | (Filenames can evolve; check the tree for the latest layout.) 157 | 158 | --- 159 | 160 | ## 🗣️ Communication 161 | 162 | - Use GitHub Issues for bugs and feature requests. 163 | - Use PR comments for code review discussions. 164 | - Be respectful, constructive, and kind (see [Code of Conduct](./CODE_OF_CONDUCT.md)). 165 | 166 | --- 167 | 168 | ## 🙏 Acknowledgements 169 | 170 | Thanks for improving Swagger Generator! Every issue, PR, and suggestion helps make the tool better for everyone. 171 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import tiktoken 2 | import subprocess 3 | import os 4 | import re 5 | 6 | def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int: 7 | encoding = tiktoken.get_encoding(encoding_name) 8 | return len(encoding.encode(string)) 9 | 10 | def get_repo_path() -> str: 11 | """ 12 | Get the repository path from APIMESH_USER_REPO_PATH environment variable. 13 | 14 | Returns: 15 | Repository path as a string (assumes APIMESH_USER_REPO_PATH is always set). 16 | """ 17 | repo_path = os.environ["APIMESH_USER_REPO_PATH"] 18 | return os.path.abspath(repo_path) 19 | 20 | def get_repo_name() -> str: 21 | """ 22 | Get the repository name from git remote URL. 23 | 24 | Returns: 25 | Repository name extracted from git remote URL, or basename of path if git remote is not available. 26 | """ 27 | repo_path = get_repo_path() 28 | try: 29 | original_dir = os.getcwd() 30 | try: 31 | os.chdir(repo_path) 32 | result = subprocess.run( 33 | ['git', 'remote', 'get-url', 'origin'], 34 | capture_output=True, 35 | text=True, 36 | timeout=5, 37 | check=False 38 | ) 39 | os.chdir(original_dir) 40 | except Exception: 41 | os.chdir(original_dir) 42 | return os.path.basename(repo_path) 43 | 44 | if result.returncode == 0 and result.stdout: 45 | remote_url = result.stdout.strip() 46 | # Extract repo name from various git URL formats 47 | # SSH: git@github.com:owner/repo.git -> repo 48 | # HTTPS: https://github.com/owner/repo.git -> repo 49 | ssh_pattern = r'git@[^:]+:(?:[^/]+/)?([^/]+?)(?:\.git)?$' 50 | https_pattern = r'https?://(?:[^@]+@)?[^/]+/[^/]+/([^/]+?)(?:\.git)?$' 51 | 52 | ssh_match = re.match(ssh_pattern, remote_url) 53 | if ssh_match: 54 | return ssh_match.group(1) 55 | 56 | https_match = re.match(https_pattern, remote_url) 57 | if https_match: 58 | return https_match.group(1) 59 | 60 | # Fallback to basename if git remote doesn't match expected patterns 61 | return os.path.basename(repo_path) 62 | except Exception: 63 | # Fallback to basename if any error occurs 64 | return os.path.basename(repo_path) 65 | 66 | def format_repo_name(repo_name: str) -> str: 67 | """ 68 | Format repository name for display. 69 | Converts snake_case, kebab-case, or camelCase to Title Case with spaces. 70 | 71 | Examples: 72 | sample_rails_app -> Sample Rails App 73 | sample-rails-app -> Sample Rails App 74 | sampleRailsApp -> Sample Rails App 75 | 76 | Args: 77 | repo_name: Raw repository name 78 | 79 | Returns: 80 | Formatted repository name in Title Case 81 | """ 82 | # Replace underscores and hyphens with spaces 83 | formatted = repo_name.replace('_', ' ').replace('-', ' ') 84 | 85 | # Insert spaces before capital letters (for camelCase) 86 | formatted = re.sub(r'([a-z])([A-Z])', r'\1 \2', formatted) 87 | 88 | # Convert to title case (capitalize first letter of each word) 89 | formatted = formatted.title() 90 | 91 | return formatted 92 | 93 | def get_output_filepath() -> str: 94 | """ 95 | Get the output filepath from APIMESH_OUTPUT_FILEPATH environment variable. 96 | If not set, defaults to {repo_path}/apimesh/swagger.json 97 | 98 | Returns: 99 | Output filepath as a string. 100 | """ 101 | output_filepath = os.environ.get("APIMESH_OUTPUT_FILEPATH") 102 | if output_filepath: 103 | return os.path.abspath(output_filepath) 104 | # Default to repo_path/apimesh/swagger.json 105 | repo_path = get_repo_path() 106 | default_path = os.path.join(repo_path, "apimesh", "swagger.json") 107 | return os.path.abspath(default_path) 108 | 109 | def get_github_repo_url() -> str: 110 | """ 111 | Get the GitHub repository URL from git remote. 112 | Uses APIMESH_USER_REPO_PATH environment variable to determine the repository path. 113 | 114 | Returns: 115 | GitHub repository URL (e.g., "https://github.com/owner/repo") or empty string if not available. 116 | """ 117 | try: 118 | repo_path = get_repo_path() 119 | original_dir = os.getcwd() 120 | try: 121 | os.chdir(repo_path) 122 | result = subprocess.run( 123 | ['git', 'remote', 'get-url', 'origin'], 124 | capture_output=True, 125 | text=True, 126 | timeout=5, 127 | check=False 128 | ) 129 | os.chdir(original_dir) 130 | except Exception: 131 | os.chdir(original_dir) 132 | return "" 133 | 134 | if result.returncode == 0 and result.stdout: 135 | remote_url = result.stdout.strip() 136 | # Convert SSH format (git@github.com:owner/repo.git) to HTTPS format 137 | # or extract from HTTPS format (https://github.com/owner/repo.git) 138 | ssh_pattern = r'git@github\.com:(.+?)(?:\.git)?$' 139 | https_pattern = r'https?://(?:www\.)?github\.com/(.+?)(?:\.git)?$' 140 | 141 | ssh_match = re.match(ssh_pattern, remote_url) 142 | if ssh_match: 143 | owner_repo = ssh_match.group(1) 144 | return f"https://github.com/{owner_repo}" 145 | 146 | https_match = re.match(https_pattern, remote_url) 147 | if https_match: 148 | owner_repo = https_match.group(1) 149 | return f"https://github.com/{owner_repo}" 150 | 151 | # Return as-is if it doesn't match GitHub patterns 152 | return remote_url 153 | 154 | return "" 155 | except Exception: 156 | return "" 157 | 158 | def get_git_commit_hash() -> str: 159 | """ 160 | Get the current git commit hash for the repository. 161 | Uses APIMESH_USER_REPO_PATH environment variable to determine the repository path. 162 | 163 | Returns: 164 | Git commit hash as a string, or empty string if not available. 165 | """ 166 | try: 167 | repo_path = get_repo_path() 168 | # Change to repo directory for git command 169 | original_dir = os.getcwd() 170 | try: 171 | os.chdir(repo_path) 172 | result = subprocess.run( 173 | ['git', 'rev-parse', 'HEAD'], 174 | capture_output=True, 175 | text=True, 176 | timeout=5, 177 | check=False 178 | ) 179 | os.chdir(original_dir) 180 | except Exception: 181 | os.chdir(original_dir) 182 | return "" 183 | 184 | if result.returncode == 0 and result.stdout: 185 | return result.stdout.strip() 186 | return "" 187 | except Exception: 188 | return "" -------------------------------------------------------------------------------- /rails_pipeline/generate_file_information.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, List, Optional 3 | 4 | from tree_sitter import Language, Parser 5 | import tree_sitter_ruby 6 | 7 | from config import Configurations 8 | 9 | config = Configurations() 10 | 11 | RUBY_LANGUAGE = Language(tree_sitter_ruby.language()) 12 | parser = Parser(RUBY_LANGUAGE) 13 | 14 | 15 | def parse_file(filename: str): 16 | with open(filename, "r", encoding="utf-8") as f: 17 | code = f.read() 18 | tree = parser.parse(code.encode("utf-8")) 19 | return tree, code 20 | 21 | 22 | def _node_text(source: str, node) -> str: 23 | return source[node.start_byte : node.end_byte] 24 | 25 | 26 | def _gather_class_info(node, source: str) -> Dict: 27 | name_node = node.child_by_field_name("name") 28 | name = _node_text(source, name_node) if name_node else "" 29 | superclass_node = node.child_by_field_name("superclass") 30 | superclass = None 31 | if superclass_node: 32 | superclass = _node_text(source, superclass_node).strip() 33 | if superclass: 34 | superclass = superclass.lstrip("<").strip() 35 | return { 36 | "type": "class", 37 | "name": name, 38 | "start_line": node.start_point[0] + 1, 39 | "end_line": node.end_point[0] + 1, 40 | "superclass": superclass, 41 | } 42 | 43 | 44 | def _gather_module_info(node, source: str) -> Dict: 45 | name_node = node.child_by_field_name("name") 46 | name = _node_text(source, name_node) if name_node else "" 47 | return { 48 | "type": "module", 49 | "name": name, 50 | "start_line": node.start_point[0] + 1, 51 | "end_line": node.end_point[0] + 1, 52 | } 53 | 54 | 55 | def _gather_method_info(node, source: str) -> Dict: 56 | name_node = node.child_by_field_name("name") 57 | name = _node_text(source, name_node) if name_node else "" 58 | return { 59 | "type": "function", 60 | "name": name, 61 | "start_line": node.start_point[0] + 1, 62 | "end_line": node.end_point[0] + 1, 63 | } 64 | 65 | 66 | def _gather_call_info(node, source: str) -> Dict: 67 | name_node = node.child_by_field_name("method") 68 | if not name_node: 69 | name_node = node.child_by_field_name("name") 70 | name = _node_text(source, name_node) if name_node else "" 71 | call_info = { 72 | "type": "function_call", 73 | "name": name, 74 | "start_line": node.start_point[0] + 1, 75 | "end_line": node.end_point[0] + 1, 76 | } 77 | 78 | definition_range = _infer_definition_range(node, source) 79 | if definition_range: 80 | call_info.update(definition_range) 81 | return call_info 82 | 83 | 84 | def _infer_definition_range(node, source: str) -> Optional[Dict]: 85 | """ 86 | Attempt to infer the definition range for an inline function call by 87 | locating the matching method definition within the same source buffer. 88 | """ 89 | name_node = node.child_by_field_name("method") 90 | if not name_node: 91 | name_node = node.child_by_field_name("name") 92 | if not name_node: 93 | return None 94 | 95 | name = _node_text(source, name_node) 96 | # This heuristic scans siblings in the same scope looking for `def name`. 97 | parent = node.parent 98 | while parent is not None: 99 | for sibling in parent.children: 100 | if sibling.type in {"method", "singleton_method"}: 101 | method_name_node = sibling.child_by_field_name("name") 102 | if method_name_node and _node_text(source, method_name_node) == name: 103 | return { 104 | "function_start_line": sibling.start_point[0] + 1, 105 | "function_end_line": sibling.end_point[0] + 1, 106 | } 107 | parent = parent.parent 108 | return None 109 | 110 | 111 | def _gather_import_info(node, source: str, base_directory: str) -> Optional[Dict]: 112 | method_node = node.child_by_field_name("method") 113 | if not method_node: 114 | return None 115 | 116 | method_name = _node_text(source, method_node) 117 | if method_name not in {"require", "require_relative"}: 118 | return None 119 | 120 | arguments_node = node.child_by_field_name("arguments") 121 | if arguments_node is None or len(arguments_node.children) == 0: 122 | return None 123 | 124 | literal = None 125 | for child in arguments_node.children: 126 | if child.type == "string": 127 | content = child.child_by_field_name("content") 128 | if content: 129 | literal = _node_text(source, content) 130 | break 131 | if child.type == "symbol_literal": 132 | sym = child.child_by_field_name("name") 133 | if sym: 134 | literal = _node_text(source, sym) 135 | break 136 | 137 | if literal is None: 138 | return None 139 | 140 | origin = _resolve_required_path( 141 | literal, base_directory, method_name == "require_relative" 142 | ) 143 | 144 | return { 145 | "type": "import", 146 | "imported_name": literal, 147 | "from_module": literal, 148 | "origin": origin, 149 | "line": node.start_point[0] + 1, 150 | "path_exists": origin is not None and os.path.exists(origin), 151 | "usage_lines": [], 152 | } 153 | 154 | 155 | def _resolve_required_path( 156 | literal: str, base_directory: str, is_relative: bool 157 | ) -> Optional[str]: 158 | if is_relative: 159 | candidate = os.path.normpath(os.path.join(base_directory, f"{literal}.rb")) 160 | if os.path.exists(candidate): 161 | return candidate 162 | else: 163 | candidate = os.path.join(base_directory, f"{literal}.rb") 164 | if os.path.exists(candidate): 165 | return os.path.normpath(candidate) 166 | return None 167 | 168 | 169 | def get_elements(tree, source: str, base_directory: str) -> Dict: 170 | elements = { 171 | "classes": [], 172 | "modules": [], 173 | "functions": [], 174 | "function_calls": [], 175 | } 176 | imports: List[Dict] = [] 177 | 178 | cursor = [tree.root_node] 179 | while cursor: 180 | node = cursor.pop() 181 | node_type = node.type 182 | if node_type == "class": 183 | elements["classes"].append(_gather_class_info(node, source)) 184 | elif node_type == "module": 185 | elements["modules"].append(_gather_module_info(node, source)) 186 | elif node_type in {"method", "singleton_method"}: 187 | elements["functions"].append(_gather_method_info(node, source)) 188 | elif node_type in {"call", "command", "command_call"}: 189 | elements["function_calls"].append(_gather_call_info(node, source)) 190 | 191 | import_info = _gather_import_info(node, source, base_directory) 192 | if import_info: 193 | imports.append(import_info) 194 | 195 | cursor.extend(list(node.children)) 196 | 197 | return elements, imports 198 | 199 | 200 | def process_file(filename: str, base_directory: Optional[str] = None) -> Dict: 201 | if not base_directory: 202 | base_directory = os.path.dirname(filename) 203 | 204 | tree, code = parse_file(filename) 205 | elements, imports = get_elements(tree, code, base_directory) 206 | return {"filename": filename, "elements": elements, "imports": imports} 207 | -------------------------------------------------------------------------------- /swagger_generation_cli.py: -------------------------------------------------------------------------------- 1 | import traceback 2 | import os 3 | 4 | from user_config import UserConfigurations 5 | from swagger_generator import SwaggerGeneration 6 | from file_scanner import FileScanner 7 | from framework_identifier import FrameworkIdentifier 8 | from endpoints_extractor import EndpointsExtractor 9 | from faiss_index_generator import GenerateFaissIndex 10 | from nodejs_pipeline.run_swagger_generation import run_swagger_generation as nodejs_swagger_generator 11 | from python_pipeline.run_swagger_generation import run_swagger_generation as python_swagger_generator 12 | from rails_pipeline.run_swagger_generation import run_swagger_generation as ruby_on_rails_swagger_generator 13 | from golang_pipeline.run_swagger_generation import run_swagger_generation as golang_swagger_generator 14 | from utils import get_output_filepath 15 | import requests, json 16 | import sys 17 | 18 | class RunSwagger: 19 | def __init__(self, project_api_key, openai_api_key, ai_chat_id, is_mcp): 20 | self.ai_chat_id = ai_chat_id 21 | self.user_configurations = UserConfigurations(project_api_key, openai_api_key, ai_chat_id, is_mcp) 22 | self.user_config = self.user_configurations.load_user_config() 23 | self.framework_identifier = FrameworkIdentifier() 24 | self.file_scanner = FileScanner() 25 | self.endpoints_extractor = EndpointsExtractor() 26 | self.faiss_index = GenerateFaissIndex() 27 | self.swagger_generator = SwaggerGeneration() 28 | 29 | 30 | def run_python_nodejs_ruby(self, framework): 31 | swagger = None 32 | try: 33 | if framework == "django" or framework == "flask" or framework == "fastapi": 34 | swagger = python_swagger_generator(self.user_config['api_host']) 35 | elif framework == "express" or framework == "nestjs": 36 | swagger = nodejs_swagger_generator(self.user_config['api_host']) 37 | elif framework == "ruby_on_rails": 38 | swagger = ruby_on_rails_swagger_generator(self.user_config['api_host']) 39 | elif framework == "golang": 40 | swagger = golang_swagger_generator(self.user_config['api_host']) 41 | except Exception as ex: 42 | traceback.print_exc() 43 | print("Fallback to old procedure") 44 | return swagger 45 | 46 | def _resolve_ai_chat_id(self, ai_chat_id): 47 | candidate = (ai_chat_id or "").strip() 48 | if candidate and candidate.lower() != "null": 49 | return candidate 50 | return self.user_config.get("ai_chat_id", "null") 51 | 52 | def run(self, ai_chat_id=None): 53 | resolved_ai_chat_id = self._resolve_ai_chat_id(ai_chat_id if ai_chat_id is not None else self.ai_chat_id) 54 | try: 55 | file_paths = self.file_scanner.get_all_file_paths() 56 | print("\n***************************************************") 57 | if self.user_config.get('framework', None): 58 | print(f"Using Existing Framework - {self.user_config['framework']}") 59 | framework = self.user_config.get('framework', "") 60 | else: 61 | print("Started framework identification") 62 | framework = self.framework_identifier.get_framework(file_paths)['framework'] 63 | self.user_config['framework'] = framework 64 | self.user_configurations.save_user_config(self.user_config) 65 | except Exception as ex: 66 | msg = str(ex) 67 | lowered = msg.lower() 68 | if "insufficient_quota" in lowered or "quota" in lowered: 69 | print("OpenAI quota exceeded. Please check your plan/billing and retry after adding credits.") 70 | else: 71 | print("We do not support this framework currently. Please contact QodexAI support.") 72 | exit() 73 | print(f"completed framework identification - {framework}") 74 | print("\n***************************************************") 75 | print("Started finding files related to API information") 76 | try: 77 | swagger = self.run_python_nodejs_ruby(framework) 78 | if swagger: 79 | output_filepath = get_output_filepath() 80 | self.swagger_generator.save_swagger_json(swagger, output_filepath) 81 | #self.upload_swagger_to_qodex(resolved_ai_chat_id) 82 | exit() 83 | api_files = self.file_scanner.find_api_files(file_paths, framework) 84 | print("Completed finding files related to API information") 85 | all_endpoints = [] 86 | for filePath in api_files: 87 | endpoints = self.endpoints_extractor.extract_endpoints_with_gpt(filePath, framework) 88 | all_endpoints.extend(endpoints) 89 | print("\n***************************************************") 90 | print("Started creating faiss index for all files") 91 | faiss_vector = self.faiss_index.create_faiss_index(file_paths, framework) 92 | print("Completed creating faiss index for all files") 93 | print("Fetching authentication related information") 94 | authentication_information = self.faiss_index.get_authentication_related_information(faiss_vector) 95 | print("Completed Fetching authentication related information") 96 | endpoint_related_information = self.endpoints_extractor.get_endpoint_related_information(faiss_vector, all_endpoints) 97 | swagger = self.swagger_generator.create_swagger_json(endpoint_related_information, authentication_information, framework, self.user_config['api_host']) 98 | except Exception as ex: 99 | traceback.print_exc() 100 | print("Oops! looks like we encountered an issue. Please try after some time.") 101 | exit() 102 | try: 103 | output_filepath = get_output_filepath() 104 | self.swagger_generator.save_swagger_json(swagger, output_filepath) 105 | except Exception as ex: 106 | print("Swagger was not able to be saved. Please check your project api key and try again.") 107 | #self.upload_swagger_to_qodex(resolved_ai_chat_id) 108 | return 109 | 110 | 111 | def upload_swagger_to_qodex(self, ai_chat_id): 112 | qodex_api_key = self.user_config['qodex_api_key'] 113 | if qodex_api_key: 114 | print("Uploading swagger to Qodex.AI") 115 | url = "https://api.app.qodex.ai/api/v1/collection_imports/create_with_json" 116 | output_filepath = get_output_filepath() 117 | with open(output_filepath, "r") as file: 118 | swagger_doc = json.load(file) 119 | payload = { 120 | "api_key": qodex_api_key, 121 | "swagger_doc": swagger_doc, 122 | "ai_chat_id": ai_chat_id 123 | } 124 | response = requests.post(url, json=payload) 125 | 126 | # Check the response 127 | if response.status_code == 200 or response.status_code == 201: 128 | print("Success:", response.json()) # Or response.text for plain text responses 129 | print("Swagger successfully uploaded to Qodex AI. Please refresh your tab.") 130 | print("We highly recommend you to review the apis before generating test scenarios.") 131 | if str(ai_chat_id) != 'null': 132 | print("Open the following link in your browser or refresh the existing open page to continue further") 133 | print(f"https://app.qodex.ai/ai-agent?chatId={ai_chat_id}") 134 | else: 135 | print(f"Failed with status code {response.status_code}: {response.text}") 136 | return 137 | 138 | 139 | openai_api_key = sys.argv[1] if len(sys.argv) > 1 else "" 140 | project_api_key = sys.argv[2] if len(sys.argv) > 2 else "" 141 | ai_chat_id = sys.argv[3] if len(sys.argv) > 3 else "" 142 | is_mcp = sys.argv[4] if len(sys.argv) > 4 else False 143 | 144 | RunSwagger(project_api_key, openai_api_key, ai_chat_id, is_mcp).run(ai_chat_id) 145 | -------------------------------------------------------------------------------- /python_pipeline/run_swagger_generation.py: -------------------------------------------------------------------------------- 1 | import os, json, ast 2 | import shutil 3 | import datetime 4 | from pathlib import Path 5 | from python_pipeline.generate_file_information import process_file 6 | from python_pipeline.find_api_definition_files import find_api_definition_files 7 | from python_pipeline.identify_api_functions import set_parents, find_api_endpoints 8 | from config import Configurations 9 | from python_pipeline.definition_swagger_generator import get_function_definition_swagger 10 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name 11 | 12 | config = Configurations() 13 | 14 | 15 | def should_process_directory(dir_path: str) -> bool: 16 | """ 17 | Check if a directory should be processed or ignored 18 | """ 19 | path_parts = dir_path.split(os.sep) 20 | return not any(part in config.ignored_dirs for part in path_parts) 21 | 22 | def run_swagger_generation(host): 23 | directory_path = get_repo_path() 24 | repo_name = get_repo_name() 25 | new_dir_name = "qodex_file_information" 26 | new_dir_path = os.path.join(directory_path, new_dir_name) 27 | os.makedirs(new_dir_path, exist_ok=True) 28 | for root, dirs, files in os.walk(directory_path): 29 | for file in files: 30 | file_path = os.path.join(root, file) 31 | if os.path.exists(file_path) and should_process_directory(str(file_path)) and file_path.endswith(".py"): 32 | file_info = process_file(file_path, directory_path) 33 | json_file_name = new_dir_path +"/"+ str(file_path).replace("/", "_q_").strip(".py") + ".json" 34 | with open(json_file_name, "w") as f: 35 | json.dump(file_info, f, indent=4) 36 | api_definition_files = find_api_definition_files(directory_path) 37 | all_endpoints_dict = dict() 38 | for file in api_definition_files: 39 | all_endpoints = [] 40 | py_file = Path(file) 41 | source = py_file.read_text(encoding="utf-8") 42 | tree = ast.parse(source) 43 | set_parents(tree) 44 | eps = find_api_endpoints(py_file) 45 | if eps: 46 | all_endpoints.extend(eps) 47 | all_endpoints_dict[file] = all_endpoints 48 | swagger = { 49 | "openapi": "3.0.0", 50 | "info": { 51 | "title": repo_name, 52 | "version": "1.0.0", 53 | "description": "This Swagger file was generated using OpenAI GPT.", 54 | "generated_at": datetime.datetime.utcnow().isoformat() + "Z", 55 | "commit_reference": get_git_commit_hash(), 56 | "github_repo_url": get_github_repo_url() 57 | }, 58 | "servers": [ 59 | { 60 | "url": host 61 | } 62 | ], 63 | "paths": {} 64 | } 65 | for key, value in all_endpoints_dict.items(): 66 | for item in value: 67 | if item['type'] == 'class': 68 | if item['methods']: 69 | for item1 in item['methods']: 70 | context_code_blocks, method_definition_code_block = provide_context_codeblock(directory_path, item1) 71 | swagger_for_def = get_function_definition_swagger(method_definition_code_block, context_code_blocks, item1['route']) 72 | key = list(swagger_for_def['paths'].keys())[0] 73 | if key not in swagger["paths"]: 74 | swagger["paths"][key] = {} 75 | _method_list = list(swagger_for_def['paths'][key].keys()) 76 | if not _method_list: 77 | continue 78 | _method = _method_list[0] 79 | swagger["paths"][key][_method] = swagger_for_def['paths'][key][_method] 80 | else: 81 | context_code_blocks, method_definition_code_block = provide_context_codeblock(directory_path,item) 82 | swagger_for_def = get_function_definition_swagger(method_definition_code_block, context_code_blocks, item['route']) 83 | key = list(swagger_for_def['paths'].keys())[0] 84 | if key not in swagger["paths"]: 85 | swagger["paths"][key] = {} 86 | _method_list = list(swagger_for_def['paths'][key].keys()) 87 | if not _method_list: 88 | continue 89 | _method = _method_list[0] 90 | swagger["paths"][key][_method] = swagger_for_def['paths'][key][_method] 91 | shutil.rmtree(new_dir_path) 92 | return swagger 93 | 94 | 95 | def get_dependencies(data, start_line, end_line, file_path): 96 | existing_function_names = [item['name'] for item in data['elements']['functions'] if item['name'] not in ['get', 'post', 'put', 'delete', 'patch']] 97 | in_file_dependency_functions = [] 98 | for item in data['elements']['function_calls']: 99 | if (item['name'] in existing_function_names) and item['start_line'] >= start_line and item['end_line'] <= end_line: 100 | item['file_path'] = file_path 101 | in_file_dependency_functions.append(item) 102 | imported_functions = [] 103 | for item in data['imports']: 104 | if not item['path_exists']: 105 | continue 106 | for k in item['usage_lines']: 107 | if start_line<=k<=end_line: 108 | imported_functions.append(item) 109 | if in_file_dependency_functions: 110 | for item1 in in_file_dependency_functions: 111 | if item1['start_line'] <= k <= item1['end_line'] and item not in imported_functions: 112 | imported_functions.append(item) 113 | return in_file_dependency_functions, imported_functions 114 | 115 | def get_code_blocks(in_file_dependency_functions, imported_functions, file_name, directory_path): 116 | code_blocks = [] 117 | for block in in_file_dependency_functions: 118 | with open(file_name, "r") as f: 119 | lines = f.readlines() 120 | f.close() 121 | code_blocks.append(lines[block['function_start_line'] - 1 : block['function_start_line']]) 122 | for func in imported_functions: 123 | visited = False 124 | file_name = func['origin'] 125 | json_dir_path = directory_path + "/" + "qodex_file_information" 126 | json_file = str(file_name).replace("/", "_q_").strip(".py") + ".json" 127 | complete_json_file_path = json_dir_path + "/" + json_file 128 | with open(complete_json_file_path, "r") as f: 129 | data = json.load(f) 130 | f.close() 131 | for item in data['elements']['classes']: 132 | if item['name'] == func['imported_name']: 133 | visited = True 134 | with open(file_name, "r") as f: 135 | lines = f.readlines() 136 | f.close() 137 | code_blocks.append(lines[item['start_line']-1: item['end_line']]) 138 | break 139 | if not visited: 140 | for item in data['elements']['functions']: 141 | if item['name'] == func['imported_name']: 142 | visited = True 143 | with open(file_name, "r") as f: 144 | lines = f.readlines() 145 | f.close() 146 | code_blocks.append(lines[item['start_line'] - 1: item['end_line']]) 147 | break 148 | if not visited: 149 | for item in data['elements']['variables']: 150 | if item['name'] == func['imported_name']: 151 | with open(file_name, "r") as f: 152 | lines = f.readlines() 153 | f.close() 154 | code_blocks.append(lines[item['start_line'] - 1: item['end_line']]) 155 | break 156 | return code_blocks 157 | 158 | 159 | def provide_context_codeblock(directory_path, method_info): 160 | file_name = method_info['file_path'] 161 | with open(method_info['file_path'], "r") as f: 162 | lines = f.readlines() 163 | method_definition_code_block = lines[method_info["start_line"]-1: method_info["end_line"]] 164 | json_dir_path = directory_path + "/" + "qodex_file_information" 165 | json_file = str(file_name).replace("/", "_q_").strip(".py") + ".json" 166 | complete_json_file_path = json_dir_path + "/" + json_file 167 | with open(complete_json_file_path, "r") as f: 168 | data = json.load(f) 169 | in_file_dependency_functions, imported_functions = get_dependencies(data, method_info["start_line"], method_info["end_line"], method_info['file_path']) 170 | context_code_blocks = get_code_blocks(in_file_dependency_functions, imported_functions, file_name, directory_path) 171 | return context_code_blocks, method_definition_code_block 172 | 173 | -------------------------------------------------------------------------------- /nodejs_pipeline/generate_file_information.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from tree_sitter import Language, Parser, QueryCursor 3 | import tree_sitter_javascript 4 | import tree_sitter_typescript 5 | import os 6 | import json 7 | from nodejs_pipeline.constants import ( 8 | TYPESCRIPT_FILE_EXTENSIONS, 9 | TSX_FILE_EXTENSIONS, 10 | ) 11 | 12 | # Load JavaScript grammar 13 | JS_LANGUAGE = Language(tree_sitter_javascript.language()) 14 | TS_LANGUAGE = Language(tree_sitter_typescript.language_typescript()) 15 | TSX_LANGUAGE = Language(tree_sitter_typescript.language_tsx()) 16 | 17 | 18 | def _get_language_for_file(filename: str) -> Language: 19 | suffix = Path(filename).suffix.lower() 20 | if suffix in TSX_FILE_EXTENSIONS and TSX_LANGUAGE: 21 | return TSX_LANGUAGE 22 | if suffix in TYPESCRIPT_FILE_EXTENSIONS and TS_LANGUAGE: 23 | return TS_LANGUAGE 24 | return JS_LANGUAGE 25 | 26 | def parse_file(filename): 27 | with open(filename, 'r', encoding='utf-8') as f: 28 | code = f.read() 29 | language = _get_language_for_file(filename) 30 | parser = Parser(language) 31 | tree = parser.parse(code.encode('utf-8')) 32 | return tree, code, language 33 | 34 | 35 | def _parse_with_language(code: str, language: Language): 36 | parser = Parser(language) 37 | return parser.parse(code.encode('utf-8')) 38 | 39 | def get_module_origin(module_name, base_directory): 40 | """ 41 | Resolve JS import/require origin similar to Node.js module resolution. 42 | """ 43 | # Relative import 44 | if module_name.startswith("."): 45 | path = os.path.normpath(os.path.join(base_directory, module_name)) 46 | search_exts = [ 47 | ".ts", 48 | ".tsx", 49 | ".cts", 50 | ".mts", 51 | ".js", 52 | ".mjs", 53 | ".cjs", 54 | ".d.ts", 55 | "/index.ts", 56 | "/index.tsx", 57 | "/index.cts", 58 | "/index.mts", 59 | "/index.js", 60 | "/index.mjs", 61 | "/index.cjs", 62 | ] 63 | for ext in search_exts: 64 | candidate = path + ext 65 | if os.path.exists(candidate): 66 | return os.path.abspath(candidate) 67 | return None 68 | 69 | # Look in node_modules 70 | node_module_path = os.path.join(base_directory, "node_modules", module_name) 71 | if os.path.exists(node_module_path): 72 | return os.path.abspath(node_module_path) 73 | 74 | return "" 75 | 76 | def find_import_usages(tree, imported_names, language): 77 | """Find where imported identifiers are used.""" 78 | query = language.query(""" 79 | (identifier) @ident 80 | """) 81 | cursor = QueryCursor(query) 82 | captures = cursor.captures(tree.root_node) 83 | 84 | usages = {name: [] for name in imported_names} 85 | for node in captures.get("ident", []): 86 | name = node.text.decode("utf-8") 87 | if name in imported_names: 88 | line = node.start_point[0] + 1 89 | if line not in usages[name]: 90 | usages[name].append(line) 91 | return usages 92 | 93 | def get_elements(tree, code, base_directory, language): 94 | """ 95 | Extract classes, functions, variables, function calls, imports. 96 | """ 97 | query = language.query(""" 98 | (class_declaration 99 | name: (identifier) @class-name) @class 100 | 101 | (function_declaration 102 | name: (identifier) @func-name) @function 103 | 104 | (variable_declarator 105 | name: (identifier) @var-name) @variable 106 | 107 | (call_expression 108 | function: (identifier) @called-func) @func-call 109 | 110 | (call_expression 111 | function: (member_expression 112 | property: (property_identifier) @method-name)) @method-call 113 | 114 | ; ES6 imports 115 | (import_statement 116 | (import_clause (identifier) @imported-symbol)? 117 | source: (string) @import-source) 118 | 119 | ; CommonJS require 120 | (variable_declarator 121 | name: (identifier) @var-name 122 | value: (call_expression 123 | function: (identifier) @require-func 124 | arguments: (arguments (string) @require-source) 125 | ) 126 | ) 127 | """) 128 | 129 | cursor = QueryCursor(query) 130 | captures = cursor.captures(tree.root_node) 131 | 132 | elements = { 133 | 'classes': [], 134 | 'functions': [], 135 | 'variables': [], 136 | 'function_calls': [], 137 | 'imports': [] 138 | } 139 | 140 | imported_names = set() 141 | 142 | # Collect symbols 143 | for node in captures.get("func-name", []): 144 | elements['functions'].append({ 145 | 'type': 'function', 146 | 'name': node.text.decode(), 147 | 'line': node.start_point[0] + 1, 148 | 'start_line': node.start_point[0] + 1, 149 | 'end_line': node.end_point[0]+1 150 | }) 151 | 152 | for node in captures.get("class-name", []): 153 | elements['classes'].append({ 154 | 'type': 'class', 155 | 'name': node.text.decode(), 156 | 'line': node.start_point[0] + 1, 157 | 'start_line': node.start_point[0] + 1, 158 | 'end_line': node.end_point[0]+1 159 | }) 160 | 161 | for node in captures.get("var-name", []): 162 | elements['variables'].append({ 163 | 'type': 'variable', 164 | 'name': node.text.decode(), 165 | 'line': node.start_point[0] + 1, 166 | 'start_line': node.start_point[0] + 1, 167 | 'end_line': node.end_point[0]+1 168 | }) 169 | 170 | for node in captures.get("called-func", []): 171 | elements['function_calls'].append({ 172 | 'type': 'function_call', 173 | 'name': node.text.decode(), 174 | 'line': node.start_point[0] + 1, 175 | 'start_line': node.start_point[0] + 1, 176 | 'end_line': node.end_point[0]+1 177 | }) 178 | 179 | for node in captures.get("method-name", []): 180 | elements['function_calls'].append({ 181 | 'type': 'method_call', 182 | 'name': node.text.decode(), 183 | 'line': node.start_point[0] + 1, 184 | 'start_line': node.start_point[0] + 1, 185 | 'end_line': node.end_point[0]+1 186 | }) 187 | 188 | # Handle imports 189 | sources = captures.get("import-source", []) + captures.get("require-source", []) 190 | imported_symbols = captures.get("imported-symbol", []) + captures.get("var-name", []) # align require names 191 | 192 | for i, source_node in enumerate(sources): 193 | module_name = source_node.text.decode().strip('"\'') 194 | origin = get_module_origin(module_name, base_directory) 195 | imported_name = None 196 | if i < len(imported_symbols): 197 | imported_name = imported_symbols[i].text.decode() 198 | imported_names.add(imported_name) 199 | 200 | elements['imports'].append({ 201 | 'type': 'import', 202 | 'imported_name': imported_name if imported_name else "require", 203 | 'from_module': module_name, 204 | 'origin': origin, 205 | 'line': source_node.start_point[0] + 1, 206 | 'path_exists': os.path.exists(origin) if origin and origin != "" else False, 207 | 'usage_lines': [] 208 | }) 209 | 210 | # Find import usages 211 | if imported_names: 212 | usages = find_import_usages(tree, imported_names, language) 213 | for imp in elements['imports']: 214 | name = imp['imported_name'] 215 | if name and name in usages: 216 | imp['usage_lines'] = list(set(usages[name]) - {imp['line']}) 217 | 218 | return elements 219 | 220 | def process_file(filename, base_directory=None): 221 | if not base_directory: 222 | base_directory = os.path.dirname(filename) 223 | 224 | tree, code, language = parse_file(filename) 225 | try: 226 | elements = get_elements(tree, code, base_directory, language) 227 | except Exception as ex: 228 | suffix = Path(filename).suffix.lower() 229 | if suffix in TYPESCRIPT_FILE_EXTENSIONS or suffix in TSX_FILE_EXTENSIONS: 230 | # Fallback: try parsing with JS grammar to salvage metadata for TS/TSX files that break the TS query 231 | fallback_tree = _parse_with_language(code, JS_LANGUAGE) 232 | elements = get_elements(fallback_tree, code, base_directory, JS_LANGUAGE) 233 | else: 234 | raise 235 | 236 | return { 237 | 'filename': filename, 238 | 'elements': elements 239 | } 240 | 241 | if __name__ == "__main__": 242 | filename = "/Users/ankits/My-Favourite-Playlist/server.js" 243 | base_directory = "/Users/ankits/My-Favourite-Playlist" 244 | if os.path.exists(filename): 245 | result = process_file(filename, base_directory) 246 | print(json.dumps(result, indent=2)) 247 | else: 248 | print(f"File {filename} not found") 249 | -------------------------------------------------------------------------------- /python_pipeline/generate_file_information.py: -------------------------------------------------------------------------------- 1 | from tree_sitter import Language, Parser, QueryCursor 2 | import tree_sitter_python 3 | import ast 4 | import importlib.util 5 | import os 6 | import sys 7 | from config import Configurations 8 | 9 | config = Configurations() 10 | 11 | PY_LANGUAGE = Language(tree_sitter_python.language()) 12 | 13 | parser = Parser(PY_LANGUAGE) 14 | 15 | 16 | def parse_file(filename): 17 | with open(filename, 'r', encoding='utf-8') as f: 18 | code = f.read() 19 | tree = parser.parse(code.encode('utf-8')) 20 | return tree, code 21 | 22 | 23 | def get_module_origin(module_name, base_directory=None): 24 | try: 25 | original_path = sys.path.copy() 26 | if base_directory and base_directory not in sys.path: 27 | sys.path.insert(0, base_directory) 28 | 29 | spec = importlib.util.find_spec(module_name) 30 | if spec and spec.origin: 31 | return spec.origin 32 | elif spec is None: 33 | return "" 34 | except Exception: 35 | pass 36 | finally: 37 | sys.path = original_path 38 | 39 | if base_directory and "." in module_name: 40 | parts = module_name.split(".") 41 | potential_path = os.path.join(base_directory, *parts) 42 | for ext in (".py", "/__init__.py"): 43 | candidate = potential_path + ext 44 | if os.path.exists(candidate): 45 | return candidate 46 | return None 47 | 48 | 49 | def find_import_usages(tree, imported_names): 50 | """Find lines where imported names are used in the code.""" 51 | query = PY_LANGUAGE.query(""" 52 | (identifier) @ident 53 | """) 54 | 55 | cursor = QueryCursor(query) 56 | captures = cursor.captures(tree.root_node) 57 | 58 | usages = {name: [] for name in imported_names} 59 | 60 | for node in captures.get("ident", []): 61 | name = node.text.decode("utf-8") 62 | if name in imported_names: 63 | line = node.start_point[0] + 1 64 | if line not in usages[name]: # Avoid duplicates 65 | usages[name].append(line) 66 | 67 | return usages 68 | 69 | 70 | def analyze_imports(filepath, base_directory=None, tree=None): 71 | imports = [] 72 | imported_names = set() # Track imported names for usage lookup 73 | try: 74 | with open(filepath, "r", encoding="utf-8") as f: 75 | source = f.read() 76 | tree_ast = ast.parse(source, filename=filepath) 77 | 78 | for node in ast.walk(tree_ast): 79 | if isinstance(node, ast.ImportFrom): 80 | module = node.module 81 | if module is None: 82 | continue # skip relative imports 83 | origin = get_module_origin(module, base_directory) 84 | for alias in node.names: 85 | name = alias.asname if alias.asname else alias.name 86 | imported_names.add(name) 87 | imports.append({ 88 | 'type': 'import', 89 | 'imported_name': alias.name, 90 | 'from_module': module, 91 | 'origin': origin, 92 | 'line': node.lineno, 93 | 'path_exists': False, # Will be updated later 94 | 'usage_lines': [] # Will be populated later 95 | }) 96 | elif isinstance(node, ast.Import): 97 | for alias in node.names: 98 | name = alias.asname if alias.asname else alias.name 99 | imported_names.add(name) 100 | origin = get_module_origin(alias.name, base_directory) 101 | imports.append({ 102 | 'type': 'import', 103 | 'imported_name': alias.name, 104 | 'from_module': None, 105 | 'origin': origin, 106 | 'line': node.lineno, 107 | 'path_exists': False, # Will be updated later 108 | 'usage_lines': [] # Will be populated later 109 | }) 110 | except Exception as e: 111 | print(f"Error analyzing imports in {filepath}: {str(e)}") 112 | 113 | # Find where imported names are used 114 | if tree and imported_names: 115 | usages = find_import_usages(tree, imported_names) 116 | for import_item in imports: 117 | name = import_item['imported_name'] 118 | if import_item.get('asname'): 119 | name = import_item['asname'] 120 | import_item['usage_lines'] = list(set(usages.get(name, [])) - set([import_item['line']])) 121 | 122 | return imports 123 | 124 | 125 | def get_elements(tree): 126 | query = PY_LANGUAGE.query(""" 127 | (class_definition 128 | name: (identifier) @class-name) @class 129 | (function_definition 130 | name: (identifier) @func-name) @function 131 | (assignment 132 | left: (identifier) @var-name) @variable 133 | (call 134 | function: (identifier) @called-func) @func-call 135 | (call 136 | function: (attribute 137 | attribute: (identifier) @method-name)) @method-call 138 | (import_statement 139 | name: (dotted_name (identifier) @imported-func)) 140 | (import_from_statement 141 | name: (dotted_name (identifier) @imported-func)) 142 | """) 143 | 144 | cursor = QueryCursor(query) 145 | captures = cursor.captures(tree.root_node) 146 | 147 | elements = { 148 | 'classes': [], 149 | 'functions': [], 150 | 'variables': [], 151 | 'function_calls': [], 152 | } 153 | 154 | # Collect function definitions for cross-referencing 155 | function_defs = {} 156 | for node in captures.get("func-name", []): 157 | func_name = node.text.decode("utf-8") 158 | elements['functions'].append({ 159 | 'type': 'function', 160 | 'name': func_name, 161 | 'start_line': node.start_point[0] + 1, 162 | 'end_line': node.parent.end_point[0] + 1 163 | }) 164 | function_defs[func_name] = { 165 | 'start_line': node.start_point[0] + 1, 166 | 'end_line': node.parent.end_point[0] + 1 167 | } 168 | 169 | for node in captures.get("class-name", []): 170 | elements['classes'].append({ 171 | 'type': 'class', 172 | 'name': node.text.decode("utf-8"), 173 | 'start_line': node.start_point[0] + 1, 174 | 'end_line': node.parent.end_point[0] + 1 175 | }) 176 | 177 | for node in captures.get("var-name", []): 178 | elements['variables'].append({ 179 | 'type': 'variable', 180 | 'name': node.text.decode("utf-8"), 181 | 'start_line': node.start_point[0] + 1, 182 | 'end_line': node.parent.end_point[0] + 1 183 | }) 184 | 185 | for node in captures.get("called-func", []): 186 | func_name = node.text.decode("utf-8") 187 | call_info = { 188 | 'type': 'function_call', 189 | 'name': func_name, 190 | 'start_line': node.start_point[0] + 1, 191 | 'end_line': node.parent.end_point[0] + 1 192 | } 193 | if func_name in function_defs: 194 | call_info['function_start_line'] = function_defs[func_name]['start_line'] 195 | call_info['function_end_line'] = function_defs[func_name]['end_line'] 196 | elements['function_calls'].append(call_info) 197 | 198 | for node in captures.get("method-name", []): 199 | method_name = node.text.decode("utf-8") 200 | call_info = { 201 | 'type': 'function_call', 202 | 'name': method_name, 203 | 'start_line': node.start_point[0] + 1, 204 | 'end_line': node.parent.end_point[0] + 1 205 | } 206 | if method_name in function_defs: 207 | call_info['function_start_line'] = function_defs[method_name]['start_line'] 208 | call_info['function_end_line'] = function_defs[method_name]['end_line'] 209 | elements['function_calls'].append(call_info) 210 | return elements 211 | 212 | 213 | def check_path_exists(imports, base_directory): 214 | for import_item in imports: 215 | origin = import_item.get('origin') 216 | if origin and origin != "" and os.path.isabs(origin): 217 | try: 218 | origin = os.path.normpath(origin) 219 | base_directory = os.path.normpath(base_directory) 220 | if os.path.exists(origin): 221 | common_prefix = os.path.commonpath([origin, base_directory]) 222 | import_item['path_exists'] = common_prefix == base_directory or origin.startswith(base_directory) 223 | else: 224 | import_item['path_exists'] = False 225 | except Exception: 226 | import_item['path_exists'] = False 227 | else: 228 | import_item['path_exists'] = False 229 | return imports 230 | 231 | 232 | def process_file(filename, base_directory=None): 233 | if not base_directory: 234 | base_directory = os.path.dirname(filename) 235 | tree, code = parse_file(filename) 236 | elements = get_elements(tree) 237 | imports = analyze_imports(filename, base_directory, tree) 238 | imports = check_path_exists(imports, base_directory) 239 | return { 240 | 'filename': filename, 241 | 'elements': elements, 242 | 'imports': imports 243 | } 244 | 245 | 246 | def should_process_directory(dir_path: str) -> bool: 247 | """ 248 | Check if a directory should be processed or ignored 249 | """ 250 | path_parts = dir_path.split(os.sep) 251 | return not any(part in config.ignored_dirs for part in path_parts) 252 | 253 | 254 | if __name__ == "__main__": 255 | import json 256 | filename = "/Users/ankits/PycharmProjects/qpulse-backend/python_scripts/interactive_ai_agent/tools/get_test_scenario_tags.py" 257 | base_directory = "/Users/ankits/PycharmProjects/qpulse-backend" 258 | if os.path.exists(filename) and should_process_directory(filename) and filename.endswith(".py"): 259 | result = process_file(filename, base_directory) 260 | print(json.dumps(result, indent=2)) 261 | else: 262 | print(f"File {filename} not found") 263 | -------------------------------------------------------------------------------- /golang_pipeline/generate_file_information.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict, List, Optional 3 | 4 | from tree_sitter import Language, Parser, QueryCursor 5 | import tree_sitter_go 6 | 7 | from config import Configurations 8 | 9 | config = Configurations() 10 | 11 | GO_LANGUAGE = Language(tree_sitter_go.language()) 12 | parser = Parser(GO_LANGUAGE) 13 | _MODULE_NAME_CACHE: Dict[str, Optional[str]] = {} 14 | 15 | 16 | def parse_file(filename: str): 17 | with open(filename, "r", encoding="utf-8") as f: 18 | code = f.read() 19 | tree = parser.parse(code.encode("utf-8")) 20 | return tree, code 21 | 22 | 23 | def _node_text(source: str, node) -> str: 24 | return source[node.start_byte : node.end_byte] 25 | 26 | 27 | def _strip_quotes(value: Optional[str]) -> str: 28 | if not value: 29 | return "" 30 | value = value.strip() 31 | if value.startswith(("`", '"')) and value.endswith(("`", '"')): 32 | return value[1:-1] 33 | return value 34 | 35 | 36 | def _get_module_name(base_directory: str) -> Optional[str]: 37 | cached = _MODULE_NAME_CACHE.get(base_directory) 38 | if cached is not None: 39 | return cached 40 | module_name = None 41 | go_mod_path = os.path.join(base_directory, "go.mod") 42 | try: 43 | with open(go_mod_path, "r", encoding="utf-8") as f: 44 | for line in f: 45 | line = line.strip() 46 | if line.startswith("module "): 47 | parts = line.split() 48 | if len(parts) >= 2: 49 | module_name = parts[1] 50 | break 51 | except OSError: 52 | module_name = None 53 | _MODULE_NAME_CACHE[base_directory] = module_name 54 | return module_name 55 | 56 | 57 | def _resolve_import_origin(import_path: str, base_directory: Optional[str]) -> Optional[str]: 58 | if not base_directory: 59 | return None 60 | normalized = _strip_quotes(import_path) 61 | if not normalized: 62 | return None 63 | segments = [segment for segment in normalized.split("/") if segment] 64 | candidate = os.path.join(base_directory, *segments) 65 | if os.path.isdir(candidate): 66 | return os.path.normpath(candidate) 67 | go_file = f"{candidate}.go" 68 | if os.path.exists(go_file): 69 | return os.path.normpath(go_file) 70 | module_name = _get_module_name(base_directory) 71 | if module_name and normalized.startswith(module_name): 72 | rel_path = normalized[len(module_name) :].lstrip("/") 73 | if rel_path: 74 | module_candidate = os.path.join(base_directory, rel_path) 75 | if os.path.isdir(module_candidate): 76 | return os.path.normpath(module_candidate) 77 | module_go = f"{module_candidate}.go" 78 | if os.path.exists(module_go): 79 | return os.path.normpath(module_go) 80 | return None 81 | 82 | 83 | def _collect_functions(root, source: str, file_path: str) -> List[Dict]: 84 | functions: List[Dict] = [] 85 | stack = [root] 86 | while stack: 87 | node = stack.pop() 88 | if node.type in {"function_declaration", "method_declaration"}: 89 | name_node = node.child_by_field_name("name") 90 | if not name_node: 91 | stack.extend(list(node.children)) 92 | continue 93 | func_name = _node_text(source, name_node) 94 | receiver_node = node.child_by_field_name("receiver") 95 | receiver = _node_text(source, receiver_node).strip() if receiver_node else None 96 | functions.append( 97 | { 98 | "type": "function", 99 | "name": func_name, 100 | "receiver": receiver, 101 | "start_line": node.start_point[0] + 1, 102 | "end_line": node.end_point[0] + 1, 103 | "file_path": file_path, 104 | } 105 | ) 106 | stack.extend(list(node.children)) 107 | return functions 108 | 109 | 110 | def _collect_types(root, source: str, file_path: str) -> List[Dict]: 111 | types: List[Dict] = [] 112 | stack = [root] 113 | while stack: 114 | node = stack.pop() 115 | if node.type == "type_spec": 116 | name_node = node.child_by_field_name("name") 117 | type_node = node.child_by_field_name("type") 118 | if not name_node or not type_node: 119 | stack.extend(list(node.children)) 120 | continue 121 | type_name = _node_text(source, name_node) 122 | types.append( 123 | { 124 | "type": "type", 125 | "name": type_name, 126 | "start_line": node.start_point[0] + 1, 127 | "end_line": node.end_point[0] + 1, 128 | "type_kind": type_node.type, 129 | "file_path": file_path, 130 | } 131 | ) 132 | stack.extend(list(node.children)) 133 | return types 134 | 135 | 136 | def _extract_call_name(function_node, source: str) -> Optional[str]: 137 | if function_node is None: 138 | return None 139 | if function_node.type == "identifier": 140 | return _node_text(source, function_node) 141 | if function_node.type == "selector_expression": 142 | field_node = function_node.child_by_field_name("field") 143 | if field_node: 144 | return _node_text(source, field_node) 145 | return None 146 | 147 | 148 | def _collect_function_calls(root, source: str) -> List[Dict]: 149 | calls: List[Dict] = [] 150 | stack = [root] 151 | while stack: 152 | node = stack.pop() 153 | if node.type == "call_expression": 154 | function_node = node.child_by_field_name("function") 155 | call_name = _extract_call_name(function_node, source) 156 | if call_name: 157 | calls.append( 158 | { 159 | "type": "function_call", 160 | "name": call_name, 161 | "full_name": _node_text(source, function_node), 162 | "start_line": node.start_point[0] + 1, 163 | "end_line": node.end_point[0] + 1, 164 | } 165 | ) 166 | stack.extend(list(node.children)) 167 | return calls 168 | 169 | 170 | def _collect_imports(root, source: str, base_directory: Optional[str]) -> List[Dict]: 171 | imports: List[Dict] = [] 172 | stack = [root] 173 | while stack: 174 | node = stack.pop() 175 | if node.type == "import_declaration": 176 | for child in node.named_children: 177 | if child.type != "import_spec": 178 | continue 179 | path_node = child.child_by_field_name("path") 180 | if not path_node: 181 | continue 182 | raw_path = _node_text(source, path_node) 183 | path_value = _strip_quotes(raw_path) 184 | alias_node = child.child_by_field_name("name") 185 | alias = _node_text(source, alias_node) if alias_node else None 186 | imported_name = alias or (path_value.split("/")[-1] if path_value else None) 187 | origin = _resolve_import_origin(path_value, base_directory) 188 | imports.append( 189 | { 190 | "type": "import", 191 | "imported_name": imported_name, 192 | "alias": alias, 193 | "from_module": path_value, 194 | "origin": origin, 195 | "line": child.start_point[0] + 1, 196 | "path_exists": bool(origin and os.path.exists(origin)), 197 | "usage_lines": [], 198 | } 199 | ) 200 | stack.extend(list(node.children)) 201 | return imports 202 | 203 | 204 | def _annotate_import_usages(tree, source: str, imports: List[Dict]) -> None: 205 | alias_map = {} 206 | for item in imports: 207 | alias_key = item.get("alias") or item.get("imported_name") 208 | if alias_key and alias_key not in {"_", "."}: 209 | alias_map[alias_key] = item 210 | if not alias_map: 211 | return 212 | query = GO_LANGUAGE.query("(identifier) @ident") 213 | cursor = QueryCursor(query) 214 | captures = cursor.captures(tree.root_node) 215 | for node in captures.get("ident", []): 216 | ident = node.text.decode("utf-8") 217 | import_entry = alias_map.get(ident) 218 | if not import_entry: 219 | continue 220 | line = node.start_point[0] + 1 221 | if line == import_entry["line"]: 222 | continue 223 | usage_lines = import_entry.setdefault("usage_lines", []) 224 | if line not in usage_lines: 225 | usage_lines.append(line) 226 | 227 | 228 | def _attach_call_ranges(functions: List[Dict], calls: List[Dict]) -> None: 229 | functions_by_name: Dict[str, Dict] = {} 230 | for func in functions: 231 | functions_by_name.setdefault(func["name"], func) 232 | for call in calls: 233 | target = functions_by_name.get(call["name"]) 234 | if not target: 235 | continue 236 | call["function_start_line"] = target["start_line"] 237 | call["function_end_line"] = target["end_line"] 238 | 239 | 240 | def get_elements(tree, source: str, base_directory: str) -> Dict: 241 | elements: Dict = { 242 | "functions": [], 243 | "function_calls": [], 244 | "types": [], 245 | } 246 | functions = _collect_functions(tree.root_node, source, "") 247 | calls = _collect_function_calls(tree.root_node, source) 248 | _attach_call_ranges(functions, calls) 249 | elements["functions"] = functions 250 | elements["function_calls"] = calls 251 | elements["types"] = _collect_types(tree.root_node, source, "") 252 | imports = _collect_imports(tree.root_node, source, base_directory) 253 | _annotate_import_usages(tree, source, imports) 254 | return elements, imports 255 | 256 | 257 | def process_file(filename: str, base_directory: Optional[str] = None) -> Dict: 258 | if not base_directory: 259 | base_directory = os.path.dirname(filename) 260 | tree, source = parse_file(filename) 261 | elements, imports = get_elements(tree, source, base_directory) 262 | # Ensure file_path for functions is populated after parsing. 263 | for func in elements.get("functions", []): 264 | func["file_path"] = filename 265 | for type_entry in elements.get("types", []): 266 | type_entry["file_path"] = filename 267 | return {"filename": filename, "elements": elements, "imports": imports} 268 | -------------------------------------------------------------------------------- /swagger_generator.py: -------------------------------------------------------------------------------- 1 | from llm_client import OpenAiClient 2 | from config import Configurations 3 | import prompts 4 | from concurrent.futures import ThreadPoolExecutor, as_completed 5 | import json 6 | import time 7 | import os, re 8 | import datetime 9 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name, format_repo_name 10 | 11 | config = Configurations() 12 | 13 | class SwaggerGeneration: 14 | def __init__(self): 15 | self.openai_client = OpenAiClient() 16 | 17 | 18 | def create_swagger_json(self, endpoints, authentication_information, framework, api_host): 19 | repo_path = get_repo_path() 20 | repo_name = get_repo_name() 21 | swagger = { 22 | "openapi": "3.0.0", 23 | "info": { 24 | "title": repo_name, 25 | "version": "1.0.0", 26 | "description": "This Swagger file was generated using OpenAI GPT.", 27 | "generated_at": datetime.datetime.utcnow().isoformat() + "Z", 28 | "commit_reference": get_git_commit_hash(), 29 | "github_repo_url": get_github_repo_url() 30 | }, 31 | "servers": [ 32 | { 33 | "url": api_host 34 | } 35 | ], 36 | "paths": {} 37 | } 38 | print("\n***************************************************") 39 | print(f"\nstarted generating swagger for {len(endpoints)} endpoints") 40 | start_time = time.time() 41 | completed = 0 42 | 43 | def process_endpoint(endpoint): 44 | endpoint_swagger = self.generate_endpoint_swagger(endpoint, authentication_information, framework) 45 | return endpoint["path"], endpoint["method"].lower(), endpoint_swagger 46 | 47 | with ThreadPoolExecutor(max_workers=8) as executor: 48 | future_to_endpoint = {executor.submit(process_endpoint, endpoint): endpoint 49 | for endpoint in endpoints} 50 | 51 | for future in as_completed(future_to_endpoint): 52 | path, method, endpoint_swagger = future.result() 53 | 54 | if path not in swagger["paths"]: 55 | swagger["paths"][path] = {} 56 | 57 | key = list(endpoint_swagger['paths'].keys())[0] 58 | _method_list = list(endpoint_swagger['paths'][key].keys()) 59 | if not _method_list: 60 | continue 61 | _method = _method_list[0] 62 | swagger["paths"][path][_method] = endpoint_swagger['paths'][key][_method] 63 | 64 | completed += 1 65 | end_time = time.time() 66 | print(f"completed generating swagger for {completed} endpoints in {int(end_time - start_time)} seconds", 67 | end="\r") 68 | return swagger 69 | 70 | 71 | 72 | def generate_endpoint_swagger(self, endpoint, authentication_information, framework): 73 | if framework == "ruby_on_rails": 74 | prompt = prompts.ruby_on_rails_swagger_generation_prompt.format(endpoint_info = endpoint['info'], endpoint_method = endpoint['method'], endpoint_path = endpoint['path'], 75 | authentication_information = authentication_information) 76 | else: 77 | prompt = prompts.generic_swagger_generation_prompt.format(endpoint_info = endpoint['info'], endpoint_method = endpoint['method'], endpoint_path = endpoint['path'], 78 | authentication_information = authentication_information) 79 | messages = [ 80 | {"role": "system", "content": prompts.swagger_generation_system_prompt}, 81 | {"role": "user", "content": prompt} 82 | ] 83 | response_content = self.openai_client.call_chat_completion(messages=messages) 84 | try: 85 | start_index = response_content.find('{') 86 | end_index = response_content.rfind('}') 87 | swagger_json_block = response_content[start_index:end_index + 1] 88 | return json.loads(swagger_json_block) 89 | except Exception as ex: 90 | return {"paths": {endpoint['path']: {}}} 91 | 92 | 93 | @staticmethod 94 | def save_swagger_json(swagger, filename): 95 | """ 96 | Saves the Swagger JSON to a file. 97 | 98 | Args: 99 | swagger (dict): The Swagger JSON dictionary. 100 | filename (str): The output file name. 101 | """ 102 | swagger = SwaggerGeneration._sanitize_swagger(swagger) 103 | # Create directory if it doesn't exist 104 | directory = os.path.dirname(filename) 105 | if directory: 106 | os.makedirs(directory, exist_ok=True) 107 | with open(filename, 'w', encoding='utf-8') as file: 108 | json.dump(swagger, file, indent=2) 109 | # Display relative path (remove /workspace prefix if present) 110 | display_path = filename 111 | if filename.startswith('/workspace/'): 112 | display_path = filename[len('/workspace/'):] 113 | if not display_path.startswith('./'): 114 | display_path = './' + display_path 115 | print(f"Swagger JSON saved to {display_path}.") 116 | # Generate HTML viewer file in the same directory 117 | SwaggerGeneration.generate_html_viewer(filename) 118 | 119 | @staticmethod 120 | def generate_html_viewer(swagger_json_path): 121 | """ 122 | Generates an HTML viewer file in the same directory as the swagger.json file. 123 | Embeds the swagger.json data directly into the HTML to avoid CORS issues. 124 | 125 | Args: 126 | swagger_json_path (str): Path to the swagger.json file. 127 | """ 128 | try: 129 | # Get the directory of the swagger.json file 130 | swagger_dir = os.path.dirname(swagger_json_path) 131 | if not swagger_dir: 132 | swagger_dir = '.' 133 | 134 | # Path to the HTML viewer template 135 | html_template_path = os.path.join(os.path.dirname(__file__), 'apimesh-docs.html') 136 | html_output_path = os.path.join(swagger_dir, 'apimesh-docs.html') 137 | 138 | # Read the swagger.json file 139 | swagger_data = None 140 | if os.path.exists(swagger_json_path): 141 | with open(swagger_json_path, 'r', encoding='utf-8') as f: 142 | swagger_data = json.load(f) 143 | 144 | # Read the HTML template 145 | if os.path.exists(html_template_path): 146 | with open(html_template_path, 'r', encoding='utf-8') as f: 147 | html_content = f.read() 148 | 149 | # Replace placeholder with formatted repo name from utils 150 | repo_name = get_repo_name() 151 | formatted_repo_name = format_repo_name(repo_name) 152 | html_content = html_content.replace('', formatted_repo_name) 153 | 154 | # Embed the swagger data as a JavaScript variable 155 | if swagger_data: 156 | # Escape the JSON for embedding in JavaScript 157 | swagger_json_str = json.dumps(swagger_data, indent=2) 158 | swagger_json_str = re.sub(r'') 167 | if script_start != -1: 168 | insert_pos = script_start + len('