├── img.png
├── .gitignore
├── security.md
├── rails_pipeline
    ├── __init__.py
    ├── find_api_definition_files.py
    ├── definition_swagger_generator.py
    └── generate_file_information.py
├── nodejs_pipeline
    ├── constants.py
    ├── definition_swagger_generator.py
    ├── find_api_definition_files.py
    ├── generate_file_information.py
    ├── run_swagger_generation.py
    └── identify_api_functions.py
├── requirements.txt
├── .dockerignore
├── python_pipeline
    ├── definition_swagger_generator.py
    ├── find_api_definition_files.py
    ├── identify_api_functions.py
    ├── run_swagger_generation.py
    └── generate_file_information.py
├── framework_identifier.py
├── LICENSE
├── config.py
├── Dockerfile
├── golang_pipeline
    ├── find_api_definition_files.py
    ├── definition_swagger_generator.py
    ├── generate_file_information.py
    ├── identify_api_functions.py
    └── run_swagger_generation.py
├── .github
    └── workflows
    │   ├── README.md
    │   └── docker-build.yml
├── llm_client.py
├── file_scanner.py
├── docker-entrypoint.sh
├── faiss_index_generator.py
├── user_config.py
├── swagger_mcp.py
├── bootstrap_mcp_runner.sh
├── config.yml
├── run.sh
├── endpoints_extractor.py
├── CODE_OF_CONDUCT.md
├── README.md
├── CONTRIBUTING.md
├── utils.py
├── swagger_generation_cli.py
└── swagger_generator.py


/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/qodex-ai/apimesh/HEAD/img.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycahce__/
2 | */__pycahce__/
3 | **/__pycahce__/
4 | .qodexai/
5 | .DS_Store
6 | .idea/
7 | 


--------------------------------------------------------------------------------
/security.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 | 
3 | ## Reporting a Vulnerability
4 | 
5 | If you notice a security vulnerability, please let the team know by [sending an email to support@qodex.ai](mailto:support@qodex.ai).
6 | 


--------------------------------------------------------------------------------
/rails_pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Ruby on Rails specific Swagger generation helpers.
3 | 
4 | The package mirrors the structure and responsibilities of the existing
5 | Node.js and Python generators so that the orchestrator can plug in a
6 | framework-specific implementation with minimal conditional logic.
7 | """
8 | 
9 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/constants.py:
--------------------------------------------------------------------------------
 1 | SUPPORTED_NODE_FILE_EXTENSIONS = (
 2 |     ".js",
 3 |     ".cjs",
 4 |     ".mjs",
 5 |     ".ts",
 6 |     ".tsx",
 7 |     ".cts",
 8 |     ".mts",
 9 | )
10 | 
11 | JAVASCRIPT_FILE_EXTENSIONS = (".js", ".cjs", ".mjs")
12 | TYPESCRIPT_FILE_EXTENSIONS = (".ts", ".cts", ".mts")
13 | TSX_FILE_EXTENSIONS = (".tsx",)
14 | 
15 | METADATA_DIR_NAME = "qodex_file_information"
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.3.16
 2 | langchain-community==0.3.16
 3 | langchain-core==0.3.63
 4 | langchain-openai==0.3.5
 5 | langsmith==0.1.139
 6 | openai==1.76.0
 7 | tiktoken==0.8.0
 8 | faiss-cpu==1.9.0.post1
 9 | langchain-text-splitters==0.3.4
10 | pyyaml==6.0.2
11 | numpy<2
12 | tree-sitter==0.25.1
13 | tree-sitter-python==0.23.6
14 | tree-sitter-javascript==0.23.1
15 | tree-sitter-ruby==0.23.1
16 | tree-sitter-go==0.25.0
17 | tree-sitter-typescript==0.23.2
18 | esprima==4.0.1
19 | requests
20 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | .gitattributes
 5 | 
 6 | # Python
 7 | __pycache__
 8 | *.pyc
 9 | *.pyo
10 | *.pyd
11 | .Python
12 | *.so
13 | *.egg
14 | *.egg-info
15 | dist
16 | build
17 | *.whl
18 | qodexai-virtual-env/
19 | venv/
20 | env/
21 | ENV/
22 | 
23 | # IDE
24 | .vscode/
25 | .idea/
26 | *.swp
27 | *.swo
28 | *~
29 | 
30 | # OS
31 | .DS_Store
32 | Thumbs.db
33 | 
34 | # Documentation
35 | *.md
36 | !README.md
37 | 
38 | # Other
39 | *.log
40 | .pytest_cache/
41 | .coverage
42 | htmlcov/
43 | 
44 | # User config (will be generated)
45 | .qodexai/
46 | 
47 | # Generated swagger files (users will generate their own)
48 | swagger.json
49 | *.json
50 | 
51 | 


--------------------------------------------------------------------------------
/python_pipeline/definition_swagger_generator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from prompts import python_swagger_prompt
 3 | from llm_client import OpenAiClient
 4 | 
 5 | 
 6 | 
 7 | def get_function_definition_swagger(function_definition, context, route):
 8 |     openai_ai_client = OpenAiClient()
 9 |     messages = [{
10 |         "role": "user",
11 |         "content": python_swagger_prompt.format(route = route, function_definition = function_definition, context = context)
12 |     }]
13 |     response = openai_ai_client.call_chat_completion(messages=messages, temperature=1)
14 |     start_index = response.find('{')
15 |     end_index = response.rfind('}')
16 |     swagger_json_block = response[start_index:end_index + 1]
17 |     return json.loads(swagger_json_block)
18 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/definition_swagger_generator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from prompts import node_js_prompt
 3 | from llm_client import OpenAiClient
 4 | 
 5 | 
 6 | 
 7 | def get_function_definition_swagger(function_definition, context, route):
 8 |     openai_ai_client = OpenAiClient()
 9 |     content = node_js_prompt.format(route = route, function_definition = function_definition, context=context)
10 |     messages = [{
11 |         "role": "user",
12 |         "content": content
13 |     }]
14 |     response = openai_ai_client.call_chat_completion(messages=messages, temperature=1)
15 |     start_index = response.find('{')
16 |     end_index = response.rfind('}')
17 |     swagger_json_block = response[start_index:end_index + 1]
18 |     return json.loads(swagger_json_block)
19 | 


--------------------------------------------------------------------------------
/framework_identifier.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from config import Configurations
 3 | from prompts import framework_identifier_prompt, framework_identifier_system_prompt
 4 | from llm_client import OpenAiClient
 5 | 
 6 | 
 7 | class FrameworkIdentifier:
 8 |     def __init__(self):
 9 |         self.config = Configurations()
10 |         self.openai_client = OpenAiClient()
11 | 
12 | 
13 |     def get_framework(self, file_paths):
14 |         prompt = framework_identifier_prompt.format(file_paths = file_paths, frameworks = str(list(self.config.routing_patters_map.keys())))
15 |         messages = [
16 |             {"role": "system", "content": framework_identifier_system_prompt},
17 |             {"role": "user", "content": prompt}
18 |         ]
19 |         response_content = self.openai_client.call_chat_completion(messages=messages)
20 |         start_index = response_content.find('{')
21 |         end_index = response_content.rfind('}')
22 |         swagger_json_block = response_content[start_index:end_index + 1]
23 |         return json.loads(swagger_json_block)
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 qodex-ai
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | 
 4 | 
 5 | class Configurations:
 6 |     def __init__(self):
 7 |         # Get config path from environment variable
 8 |         config_path = os.environ.get("APIMESH_CONFIG_PATH")
 9 |         if config_path is None:
10 |             raise ValueError(
11 |                 "APIMESH_CONFIG_PATH environment variable is not set. "
12 |                 "Please set it to the path of your config.yml file."
13 |             )
14 |         
15 |         # Load YAML configurations
16 |         self.config = self._load_config(config_path)
17 | 
18 |         # Assign values from the YAML file
19 |         self.ignored_dirs = set(self.config.get("ignored_dirs", []))
20 |         self.routing_patters_map = self.config.get("routing_patterns_map", {})
21 |         self.gpt_4o_model_name = self.config.get("gpt_4o_model_name", "gpt-4o")
22 | 
23 |     def _load_config(self, config_path):
24 |         """Loads configuration from a YAML file."""
25 |         with open(config_path, "r", encoding="utf-8") as file:
26 |             config = yaml.safe_load(file)
27 |             return config if config is not None else {}
28 | 
29 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.10-slim
 2 | 
 3 | # Set working directory
 4 | WORKDIR /app
 5 | 
 6 | # Install system dependencies
 7 | RUN apt-get update && apt-get install -y \
 8 |     git \
 9 |     && rm -rf /var/lib/apt/lists/*
10 | 
11 | # Copy requirements file
12 | COPY requirements.txt .
13 | 
14 | # Install Python dependencies
15 | RUN pip install --no-cache-dir -r requirements.txt
16 | 
17 | # Copy the entire application
18 | COPY . .
19 | 
20 | # Create a directory for mounted repos (users will mount their repo here)
21 | RUN mkdir -p /workspace
22 | 
23 | # Create entrypoint script
24 | COPY docker-entrypoint.sh /usr/local/bin/
25 | RUN chmod +x /usr/local/bin/docker-entrypoint.sh
26 | 
27 | # Set environment variables for config file paths
28 | ENV APIMESH_CONFIG_PATH=/app/config.yml
29 | ENV APIMESH_USER_CONFIG_PATH=/workspace/apimesh/config.json
30 | ENV APIMESH_USER_REPO_PATH=/workspace
31 | ENV APIMESH_OUTPUT_FILEPATH=/workspace/apimesh/swagger.json
32 | 
33 | # Set the entrypoint
34 | ENTRYPOINT ["docker-entrypoint.sh"]
35 | 
36 | # Default command - run interactively if no arguments provided
37 | # Users can override by passing arguments: docker run ... qodexai/apimesh --help
38 | CMD []
39 | 
40 | 


--------------------------------------------------------------------------------
/golang_pipeline/find_api_definition_files.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | from config import Configurations
 5 | 
 6 | config = Configurations()
 7 | 
 8 | 
 9 | def _is_ignored(path: Path) -> bool:
10 |     return any(part in config.ignored_dirs for part in path.parts)
11 | 
12 | 
13 | def _is_test_file(path: Path) -> bool:
14 |     return path.name.endswith("_test.go")
15 | 
16 | 
17 | def _looks_like_routing_file(path: Path) -> bool:
18 |     """
19 |     Heuristic to bubble up files that are likely to contain router definitions.
20 |     """
21 |     lowered = path.as_posix().lower()
22 |     candidates = (
23 |         "route",
24 |         "router",
25 |         "handler",
26 |         "controller",
27 |         "server",
28 |         "api",
29 |         "http",
30 |     )
31 |     return any(token in lowered for token in candidates)
32 | 
33 | 
34 | def find_go_files(directory: str) -> List[Path]:
35 |     base_path = Path(directory)
36 |     go_files: List[Path] = []
37 |     for file_path in base_path.rglob("*.go"):
38 |         if _is_ignored(file_path) or _is_test_file(file_path):
39 |             continue
40 |         go_files.append(file_path)
41 |     return go_files
42 | 
43 | 
44 | def find_api_definition_files(directory: str) -> List[str]:
45 |     go_files = find_go_files(directory)
46 |     go_files.sort(key=lambda p: (0 if _looks_like_routing_file(p) else 1, str(p)))
47 |     return [str(path) for path in go_files]
48 | 
49 | 


--------------------------------------------------------------------------------
/rails_pipeline/find_api_definition_files.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import List
 3 | 
 4 | from config import Configurations
 5 | 
 6 | config = Configurations()
 7 | 
 8 | 
 9 | def _is_ignored(path: Path) -> bool:
10 |     return any(part in config.ignored_dirs for part in path.parts)
11 | 
12 | 
13 | def _looks_like_controller(path: Path) -> bool:
14 |     if "app" not in path.parts:
15 |         return False
16 |     if "controllers" not in path.parts:
17 |         return False
18 |     return path.name.endswith("_controller.rb")
19 | 
20 | 
21 | def _looks_like_route_file(path: Path) -> bool:
22 |     return path.as_posix().endswith("config/routes.rb")
23 | 
24 | 
25 | def find_ruby_files(directory: str) -> List[Path]:
26 |     directory_path = Path(directory)
27 |     ruby_files: List[Path] = []
28 |     for file_path in directory_path.rglob("*.rb"):
29 |         if not _is_ignored(file_path):
30 |             ruby_files.append(file_path)
31 |     return ruby_files
32 | 
33 | 
34 | def find_api_definition_files(directory: str) -> List[str]:
35 |     ruby_files = find_ruby_files(directory)
36 |     api_files: List[str] = []
37 | 
38 |     for ruby_file in ruby_files:
39 |         if _looks_like_route_file(ruby_file):
40 |             api_files.append(str(ruby_file))
41 |             continue
42 |         if _looks_like_controller(ruby_file):
43 |             api_files.append(str(ruby_file))
44 | 
45 |     api_files.sort(
46 |         key=lambda path: 0 if path.endswith("config/routes.rb") else 1
47 |     )
48 |     return api_files
49 | 


--------------------------------------------------------------------------------
/.github/workflows/README.md:
--------------------------------------------------------------------------------
 1 | # GitHub Actions Workflows
 2 | 
 3 | ## Docker Build Workflow
 4 | 
 5 | This workflow automatically builds and pushes Docker images to Docker Hub when tags are pushed to the repository.
 6 | 
 7 | ### How it works:
 8 | 
 9 | 1. **Trigger**: Automatically runs when you push a tag matching pattern `v*.*.*` (e.g., `v1.0.0`, `v2.1.3`)
10 | 2. **Build**: Builds the Docker image using the Dockerfile
11 | 3. **Tag**: Tags the image with:
12 |    - The full tag name (e.g., `v1.0.0`)
13 |    - `latest` (always updated to the newest tag)
14 | 4. **Push**: Pushes all tags to Docker Hub
15 | 
16 | ### Setup Instructions:
17 | 
18 | 1. **Create a Docker Hub token**:
19 |    - Go to Docker Hub → Account Settings → Security
20 |    - Click "New Access Token"
21 |    - Give it a name (e.g., "github-actions")
22 |    - Copy the token
23 | 
24 | 2. **Add the token to GitHub Secrets**:
25 |    - Go to your GitHub repository → Settings → Secrets and variables → Actions
26 |    - Click "New repository secret"
27 |    - Name: `DOCKER_HUB_TOKEN`
28 |    - Value: Paste your Docker Hub token
29 |    - Click "Add secret"
30 | 
31 | 3. **Create and push a tag**:
32 |    ```bash
33 |    git tag v1.0.0
34 |    git push origin v1.0.0
35 |    ```
36 | 
37 | ### Version Tagging:
38 | 
39 | - Tag format: `v1.0.0`, `v2.1.3`, etc.
40 | - Images will be tagged as:
41 |   - `qodexai/apimesh:v1.0.0` (full tag)
42 |   - `qodexai/apimesh:latest` (always points to newest)
43 | 
44 | ### Manual Trigger:
45 | 
46 | You can also manually trigger the workflow from the Actions tab in GitHub.
47 | 
48 | 


--------------------------------------------------------------------------------
/.github/workflows/docker-build.yml:
--------------------------------------------------------------------------------
 1 | name: Build and Push Docker Image
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*.*.*'  # Triggers on tags like v1.0.0, v2.1.3, etc.
 7 |   workflow_dispatch:  # Allows manual triggering
 8 | 
 9 | env:
10 |   DOCKER_HUB_USERNAME: qodexai
11 |   IMAGE_NAME: apimesh
12 | 
13 | jobs:
14 |   build-and-push:
15 |     runs-on: ubuntu-latest
16 |     
17 |     steps:
18 |       - name: Checkout code
19 |         uses: actions/checkout@v4
20 | 
21 |       - name: Set up Docker Buildx
22 |         uses: docker/setup-buildx-action@v3
23 | 
24 |       - name: Extract version from tag
25 |         id: tag_version
26 |         run: |
27 |           if [[ "${{ github.ref }}" == refs/tags/* ]]; then
28 |             echo "tag=${{ github.ref_name }}" >> $GITHUB_OUTPUT
29 |           else
30 |             # For manual dispatch, use a default version
31 |             echo "tag=dev" >> $GITHUB_OUTPUT
32 |           fi
33 | 
34 |       - name: Log in to Docker Hub
35 |         uses: docker/login-action@v3
36 |         with:
37 |           username: ${{ env.DOCKER_HUB_USERNAME }}
38 |           password: ${{ secrets.DOCKER_HUB_TOKEN }}
39 | 
40 |       - name: Build and push Docker image
41 |         uses: docker/build-push-action@v5
42 |         with:
43 |           context: .
44 |           push: true
45 |           pull: true
46 |           tags: |
47 |             ${{ env.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_NAME }}:${{ steps.tag_version.outputs.tag }}
48 |             ${{ env.DOCKER_HUB_USERNAME }}/${{ env.IMAGE_NAME }}:latest
49 |           cache-from: type=gha
50 |           cache-to: type=gha,mode=max
51 |           platforms: linux/amd64,linux/arm64
52 | 
53 |       - name: Image digest
54 |         run: |
55 |           echo "Image pushed with tags:"
56 |           echo "  - ${{ steps.tag_version.outputs.tag }}"
57 |           echo "  - latest"
58 | 
59 | 


--------------------------------------------------------------------------------
/llm_client.py:
--------------------------------------------------------------------------------
 1 | from openai import OpenAI
 2 | from langchain_openai import OpenAIEmbeddings
 3 | from config import Configurations
 4 | import json, os
 5 | 
 6 | config = Configurations()
 7 | 
 8 | class OpenAiClient:
 9 |     def __init__(self):
10 |         self.openai_api_key = self.load_openai_api_key()
11 |         self.client = OpenAI(
12 |             api_key=self.openai_api_key)
13 |         self.embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=self.openai_api_key)
14 | 
15 |     def call_chat_completion(self, messages, temperature=0.5):
16 |         model = self.load_openai_model()
17 |         # The Responses API is required for Codex models (chat.completions is unsupported).
18 |         effective_temperature = 1 if model.startswith("gpt-5") else temperature
19 |         response = self.client.responses.create(
20 |             model=model,
21 |             input=messages,
22 |             temperature=effective_temperature,
23 |         )
24 |         return response.output_text
25 | 
26 |     @staticmethod
27 |     def load_openai_api_key():
28 |         config_file = os.environ.get("APIMESH_USER_CONFIG_PATH")
29 |         if config_file is None:
30 |             raise ValueError(
31 |                 "APIMESH_USER_CONFIG_PATH environment variable is not set. "
32 |                 "Please set it to the path of your config.json file."
33 |             )
34 |         with open(config_file, "r") as file:
35 |             user_config_data = json.load(file)
36 |         return user_config_data['openai_api_key']
37 | 
38 |     def load_openai_model(self):
39 |         config_file = os.environ.get("APIMESH_USER_CONFIG_PATH")
40 |         if config_file is None:
41 |             raise ValueError(
42 |                 "APIMESH_USER_CONFIG_PATH environment variable is not set. "
43 |                 "Please set it to the path of your config.json file."
44 |             )
45 |         with open(config_file, "r") as file:
46 |             user_config_data = json.load(file)
47 |         return user_config_data['openai_model']
48 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/find_api_definition_files.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from pathlib import Path
 3 | from config import Configurations
 4 | from nodejs_pipeline.constants import SUPPORTED_NODE_FILE_EXTENSIONS
 5 | 
 6 | config = Configurations()
 7 | 
 8 | API_DECORATOR_NAMES = {
 9 |     'route', 'get', 'post', 'put', 'delete', 'patch', 'options', 'head', 'all',
10 |     'api', 'endpoint', 'router', 'controller', 'module', 'middleware', 'rest'
11 | }
12 | 
13 | HTTP_METHODS = ['get', 'post', 'put', 'delete', 'patch', 'options', 'head']
14 | ROUTE_OBJECT_PREFIXES = ['app', 'router', 'route', 'api', 'controller', 'server']
15 | ROUTE_OBJECT_SUFFIXES = ['Router', 'Routes', 'Api', 'Controller', 'App', 'Server']
16 | 
17 | route_prefix_pattern = r'(?:' + '|'.join(ROUTE_OBJECT_PREFIXES) + r')'
18 | route_suffix_pattern = r'(?:[A-Za-z_$][\w$]*?(?:' + '|'.join(ROUTE_OBJECT_SUFFIXES) + r'))'
19 | route_object_pattern = r'(?:' + route_prefix_pattern + r'|' + route_suffix_pattern + r')'
20 | 
21 | # Regex patterns to detect API routes or decorators
22 | ROUTE_METHOD_PATTERN = re.compile(
23 |     r'\b' + route_object_pattern + r'\s*\.\s*(?:' + '|'.join(HTTP_METHODS) + r')\s*\(',
24 |     re.IGNORECASE
25 | )
26 | 
27 | DECORATOR_PATTERN = re.compile(
28 |     r'@\s*(' + '|'.join(API_DECORATOR_NAMES) + r')\b',
29 |     re.IGNORECASE
30 | )
31 | 
32 | def find_node_files(directory):
33 |     directory = Path(directory)
34 |     node_files = []
35 |     for file in directory.rglob('*'):
36 |         if file.suffix and file.suffix.lower() in SUPPORTED_NODE_FILE_EXTENSIONS:
37 |             if not any(part in config.ignored_dirs for part in file.parts):
38 |                 node_files.append(file)
39 |     return node_files
40 | 
41 | def file_contains_api_defs(file_path):
42 |     try:
43 |         text = file_path.read_text(encoding='utf-8')
44 |     except Exception:
45 |         return False
46 | 
47 |     if ROUTE_METHOD_PATTERN.search(text):
48 |         return True
49 | 
50 |     if DECORATOR_PATTERN.search(text):
51 |         return True
52 | 
53 |     return False
54 | 
55 | def find_api_definition_files(directory):
56 |     node_files = find_node_files(directory)
57 |     api_files = []
58 |     for node_file in node_files:
59 |         if file_contains_api_defs(node_file):
60 |             api_files.append(str(node_file))
61 |     return api_files
62 | 


--------------------------------------------------------------------------------
/file_scanner.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List
 3 | from config import Configurations
 4 | from utils import get_repo_path
 5 | import re
 6 | 
 7 | config = Configurations()
 8 | class FileScanner:
 9 | 
10 |     def __init__(self):
11 |         pass
12 | 
13 |     def get_all_file_paths(self) -> List[str|bytes]:
14 |         """
15 |         Get all file paths in the repository, ignoring specified directories
16 |         """
17 |         repo_path = get_repo_path()
18 |         file_paths = []
19 |         supported_extensions = ('.py', '.js', '.ts', '.java', '.rb', '.go')
20 | 
21 |         for root, dirs, files in os.walk(repo_path):
22 |             dirs[:] = [d for d in dirs if d not in config.ignored_dirs]
23 | 
24 |             if not self.should_process_directory(root):
25 |                 continue
26 | 
27 |             for file in files:
28 |                 if file.endswith(supported_extensions):
29 |                     file_path = os.path.join(root, file)
30 |                     file_paths.append(file_path)
31 |         return file_paths
32 | 
33 |     @staticmethod
34 |     def find_api_files(file_paths, framework):
35 |         patterns = config.routing_patters_map.get(framework)
36 |         if not patterns:
37 |             print(f"Warning: No routing patterns configured for framework '{framework or 'unknown'}'. Scanning all supported files.")
38 |             return list(file_paths)
39 |         api_files = []
40 |         for file_path in file_paths:
41 |             try:
42 |                 with open(file_path, 'r', encoding='utf-8') as file:
43 |                     content = file.read()
44 |                     if any(re.search(pattern, content) for pattern in patterns):
45 |                         if framework == "ruby_on_rails":
46 |                             if file_path.endswith('.rb'):
47 |                                 api_files.append(file_path)
48 |                         else:
49 |                             api_files.append(file_path)
50 |             except (UnicodeDecodeError, FileNotFoundError):
51 |                 continue
52 |         return api_files
53 | 
54 |     @staticmethod
55 |     def should_process_directory(dir_path: str) -> bool:
56 |         """
57 |         Check if a directory should be processed or ignored
58 |         """
59 |         path_parts = dir_path.split(os.sep)
60 |         return not any(part in config.ignored_dirs for part in path_parts)
61 | 


--------------------------------------------------------------------------------
/golang_pipeline/definition_swagger_generator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from typing import List, Optional
 3 | 
 4 | from llm_client import OpenAiClient
 5 | from prompts import (
 6 |     golang_swagger_generation_prompt,
 7 |     swagger_generation_system_prompt,
 8 | )
 9 | 
10 | 
11 | def _extract_json_block(raw_text: str) -> Optional[str]:
12 |     if not raw_text:
13 |         return None
14 |     start = raw_text.find("{")
15 |     end = raw_text.rfind("}")
16 |     if start == -1 or end == -1 or end <= start:
17 |         return None
18 |     return raw_text[start : end + 1]
19 | 
20 | 
21 | def _cleanup_swagger_payload(payload: dict) -> dict:
22 |     paths = payload.get("paths", {})
23 |     for path_data in paths.values():
24 |         for method_data in path_data.values():
25 |             auth_tag = method_data.get("auth_tag")
26 |             if auth_tag is None or str(auth_tag).strip() == "":
27 |                 method_data.pop("auth_tag", None)
28 |     return payload
29 | 
30 | 
31 | def get_function_definition_swagger(
32 |     function_definition: List[str],
33 |     context: List[List[str]],
34 |     route: str,
35 |     http_method: Optional[str] = None,
36 | ) -> dict:
37 |     client = OpenAiClient()
38 |     function_text = "".join(function_definition)
39 |     context_text = "\n\n".join("".join(block) for block in context) if context else ""
40 | 
41 |     prompt = golang_swagger_generation_prompt.format(
42 |         endpoint_method=http_method or "GET",
43 |         endpoint_path=route,
44 |         endpoint_method_lower=(http_method or "GET").lower(),
45 |         endpoint_info=function_text,
46 |         authentication_information=context_text,
47 |     )
48 | 
49 |     messages = [
50 |         {"role": "system", "content": swagger_generation_system_prompt},
51 |         {"role": "user", "content": prompt},
52 |     ]
53 | 
54 |     last_error: Optional[Exception] = None
55 |     for _ in range(3):
56 |         response = client.call_chat_completion(messages=messages, temperature=0)
57 |         payload = _extract_json_block(response)
58 |         if not payload:
59 |             last_error = ValueError("LLM response was missing JSON payload.")
60 |             continue
61 |         try:
62 |             return _cleanup_swagger_payload(json.loads(payload))
63 |         except json.JSONDecodeError as exc:
64 |             last_error = exc
65 |     raise ValueError("Unable to parse Swagger JSON response.") from last_error
66 | 


--------------------------------------------------------------------------------
/python_pipeline/find_api_definition_files.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import ast
 3 | from config import Configurations
 4 | 
 5 | config = Configurations()
 6 | 
 7 | API_DECORATOR_NAMES = {
 8 |     'route', 'get', 'post', 'put', 'delete', 'patch',
 9 |     'api', 'endpoint', 'router', 'viewset', 'view'
10 | }
11 | def find_python_files(directory):
12 |     directory = Path(directory)
13 |     python_files = []
14 |     for py_file in directory.rglob('*.py'):
15 |         # Check if any parent directory is in IGNORE_DIRS
16 |         if not any(part in config.ignored_dirs for part in py_file.parts):
17 |             python_files.append(py_file)
18 |     return python_files
19 | 
20 | def has_api_decorator(decorator_node):
21 |     if isinstance(decorator_node, ast.Call) and hasattr(decorator_node.func, 'attr'):
22 |         if decorator_node.func.attr.lower() in API_DECORATOR_NAMES:
23 |             return True
24 |     if isinstance(decorator_node, ast.Attribute):
25 |         if decorator_node.attr.lower() in API_DECORATOR_NAMES:
26 |             return True
27 |     if isinstance(decorator_node, ast.Name):
28 |         if decorator_node.id.lower() in API_DECORATOR_NAMES:
29 |             return True
30 |     return False
31 | 
32 | def file_contains_api_defs(file_path):
33 |     try:
34 |         source = file_path.read_text(encoding='utf-8')
35 |         tree = ast.parse(source, filename=str(file_path))
36 |     except Exception:
37 |         return False
38 |     for node in ast.walk(tree):
39 |         if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
40 |             for decorator in node.decorator_list:
41 |                 if has_api_decorator(decorator):
42 |                     return True
43 |         if isinstance(node, ast.ClassDef):
44 |             for decorator in node.decorator_list:
45 |                 if has_api_decorator(decorator):
46 |                     return True
47 |             for base in node.bases:
48 |                 if isinstance(base, ast.Name) and base.id.lower() in API_DECORATOR_NAMES:
49 |                     return True
50 |                 if isinstance(base, ast.Attribute) and base.attr.lower() in API_DECORATOR_NAMES:
51 |                     return True
52 |     return False
53 | 
54 | def find_api_definition_files(directory):
55 |     py_files = find_python_files(directory)
56 |     api_files = []
57 |     for py_file in py_files:
58 |         if file_contains_api_defs(py_file):
59 |             api_files.append(str(py_file))
60 |     return api_files
61 | 
62 | # directory = Path('/Users/ankits/PycharmProjects/data-science-model-serving')
63 | # api_files = find_api_definition_files(directory)
64 | # print(api_files)
65 | 


--------------------------------------------------------------------------------
/docker-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | # Default values
 5 | PROJECT_API_KEY="${PROJECT_API_KEY:-null}"
 6 | OPENAI_API_KEY="${OPENAI_API_KEY:-null}"
 7 | AI_CHAT_ID="${AI_CHAT_ID:-null}"
 8 | 
 9 | # Parse command line arguments
10 | while [[ $# -gt 0 ]]; do
11 |   case "$1" in
12 |     --project-api-key)
13 |       PROJECT_API_KEY="$2"
14 |       shift 2
15 |       ;;
16 |     --openai-api-key)
17 |       OPENAI_API_KEY="$2"
18 |       shift 2
19 |       ;;
20 |     --ai-chat-id)
21 |       AI_CHAT_ID="$2"
22 |       shift 2
23 |       ;;
24 |     --help)
25 |       echo "Swagger Generator Docker Image"
26 |       echo ""
27 |       echo "Usage (run from your repository directory):"
28 |       echo ""
29 |       echo "  # Interactive mode - prompts for missing inputs:"
30 |       echo "  cd /path/to/your/repo"
31 |       echo "  docker run --pull always -it --rm -v \$(pwd):/workspace qodexai/apimesh"
32 |       echo ""
33 |       echo "  # With environment variables:"
34 |       echo "  cd /path/to/your/repo"
35 |       echo "  docker run --pull always--rm -v \$(pwd):/workspace \\"
36 |       echo "    -e OPENAI_API_KEY=your_key \\"
37 |       echo "    -e PROJECT_API_KEY=your_key \\"
38 |       echo "    -e AI_CHAT_ID=your_chat_id \\"
39 |       echo "    qodexai/apimesh"
40 |       echo ""
41 |       echo "  # With command-line arguments:"
42 |       echo "  cd /path/to/your/repo"
43 |       echo "  docker run --pull always --rm -v \$(pwd):/workspace \\"
44 |       echo "    qodexai/apimesh \\"
45 |       echo "    --openai-api-key your_key"
46 |       echo ""
47 |       echo "Environment Variables (all optional - will prompt if not provided):"
48 |       echo "  OPENAI_API_KEY      - Your OpenAI API key"
49 |       echo "  PROJECT_API_KEY     - Your project API key"
50 |       echo "  AI_CHAT_ID          - Target AI chat ID"
51 |       echo ""
52 |       echo "Arguments (all optional - will prompt if not provided):"
53 |       echo "  --project-api-key   - Override PROJECT_API_KEY env var"
54 |       echo "  --openai-api-key    - Override OPENAI_API_KEY env var"
55 |       echo "  --ai-chat-id        - Override AI_CHAT_ID env var"
56 |       echo ""
57 |       echo "Note: Always run docker commands from your repository directory. Use -it flags for interactive mode."
58 |       exit 0
59 |       ;;
60 |     *)
61 |       echo "Unknown option: $1"
62 |       echo "Use --help for usage information"
63 |       exit 1
64 |       ;;
65 |   esac
66 | done
67 | 
68 | # Normalize values - pass empty string if null so Python script can prompt
69 | if [ "$PROJECT_API_KEY" == "null" ] || [ -z "$PROJECT_API_KEY" ]; then
70 |   PROJECT_API_KEY=""
71 | fi
72 | 
73 | if [ "$OPENAI_API_KEY" == "null" ] || [ -z "$OPENAI_API_KEY" ]; then
74 |   OPENAI_API_KEY=""
75 | fi
76 | 
77 | if [ "$AI_CHAT_ID" == "null" ] || [ -z "$AI_CHAT_ID" ]; then
78 |   AI_CHAT_ID=""
79 | fi
80 | 
81 | # Run the swagger generation
82 | # The Python script will prompt for any missing values
83 | cd /app
84 | export PYTHONPATH=/app:$PYTHONPATH
85 | 
86 | python3 swagger_generation_cli.py "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID"


--------------------------------------------------------------------------------
/rails_pipeline/definition_swagger_generator.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import re
 3 | from typing import List, Optional
 4 | 
 5 | from llm_client import OpenAiClient
 6 | from prompts import ruby_on_rails_swagger_generation_prompt
 7 | 
 8 | 
 9 | _SYSTEM_PROMPT = (
10 |     "You are a meticulous API documentation assistant. "
11 |     "Respond with a single valid JSON object that matches the requested schema. "
12 |     "Do not include any surrounding prose, markdown, or code fences."
13 | )
14 | 
15 | 
16 | def _extract_json_block(raw_text: str) -> Optional[str]:
17 |     """
18 |     Extract the JSON payload from a raw LLM response, handling code fences and
19 |     other wrapping text the model might emit.
20 |     """
21 |     if not raw_text:
22 |         return None
23 | 
24 |     fence_match = re.search(
25 |         r"```(?:json)?\s*(\{[\s\S]*\})\s*```", raw_text, flags=re.IGNORECASE
26 |     )
27 |     if fence_match:
28 |         return fence_match.group(1).strip()
29 | 
30 |     start_index = raw_text.find("{")
31 |     end_index = raw_text.rfind("}")
32 |     if start_index == -1 or end_index == -1 or end_index <= start_index:
33 |         return None
34 |     return raw_text[start_index : end_index + 1].strip()
35 | 
36 | 
37 | def get_function_definition_swagger(
38 |     function_definition: List[str],
39 |     context: List[List[str]],
40 |     route: str,
41 |     http_method: Optional[str] = None,
42 | ) -> dict:
43 |     """
44 |     Delegate the heavy lifting of producing a Swagger snippet for a single
45 |     Rails endpoint to the LLM, mirroring the behaviour of the Node and Python
46 |     generators.
47 |     """
48 |     openai_ai_client = OpenAiClient()
49 |     function_definition_text = "".join(function_definition)
50 |     context_text = "\n\n".join("".join(block) for block in context) if context else ""
51 |     endpoint_info_text = (
52 |         f"{function_definition_text}\n\n{context_text}"
53 |         if context_text
54 |         else function_definition_text
55 |     )
56 | 
57 |     prompt = ruby_on_rails_swagger_generation_prompt.format(
58 |         endpoint_info=endpoint_info_text,
59 |         endpoint_method=http_method or "GET",
60 |         endpoint_path=route,
61 |         authentication_information=context_text,
62 |     )
63 | 
64 |     messages = [
65 |         {"role": "system", "content": _SYSTEM_PROMPT},
66 |         {"role": "user", "content": prompt},
67 |     ]
68 | 
69 |     last_error: Optional[Exception] = None
70 |     for _ in range(3):
71 |         response = openai_ai_client.call_chat_completion(
72 |             messages=messages, temperature=0
73 |         )
74 |         swagger_json_block = _extract_json_block(response)
75 |         if not swagger_json_block:
76 |             last_error = ValueError("LLM response did not contain JSON payload.")
77 |             continue
78 |         try:
79 |             return json.loads(swagger_json_block)
80 |         except json.JSONDecodeError as exc:
81 |             last_error = exc
82 |             continue
83 | 
84 |     error_message = (
85 |         "Failed to parse Swagger JSON from LLM response after multiple attempts."
86 |     )
87 |     if last_error:
88 |         raise ValueError(error_message) from last_error
89 |     raise ValueError(error_message)
90 | 


--------------------------------------------------------------------------------
/faiss_index_generator.py:
--------------------------------------------------------------------------------
 1 | from langchain_text_splitters import RecursiveCharacterTextSplitter, Language
 2 | from langchain.vectorstores import FAISS
 3 | from llm_client import OpenAiClient
 4 | from utils import num_tokens_from_string
 5 | 
 6 | 
 7 | class GenerateFaissIndex:
 8 |     def __init__(self):
 9 |         self.openai_client = OpenAiClient()
10 | 
11 |     def create_faiss_index(self, file_paths, framework):
12 |         if framework == "ruby_on_rails":
13 |             text_splitter = RecursiveCharacterTextSplitter.from_language(
14 |                 chunk_size=2000,
15 |                 chunk_overlap=200, language=Language.RUBY
16 |             )
17 |         elif framework == "express":
18 |             text_splitter = RecursiveCharacterTextSplitter.from_language(
19 |                 chunk_size=2000,
20 |                 chunk_overlap=200, language=Language.JS
21 |             )
22 |         elif framework == "django" or framework == "flask" or framework == "fastapi":
23 |             text_splitter = RecursiveCharacterTextSplitter.from_language(
24 |                 chunk_size=2000,
25 |                 chunk_overlap=200, language=Language.PYTHON
26 |             )
27 |         elif framework == "golang":
28 |             text_splitter = RecursiveCharacterTextSplitter.from_language(
29 |                 chunk_size=2000,
30 |                 chunk_overlap=200, language=Language.GO
31 |             )
32 |         else:
33 |             text_splitter = RecursiveCharacterTextSplitter(
34 |                 chunk_size=2000,
35 |                 chunk_overlap=200
36 |             )
37 |         texts = []
38 |         metadata = []
39 | 
40 |         for file in file_paths:
41 |             with open(file, 'r', encoding='utf-8') as file:
42 |                 file_content = file.read()
43 |             chunks = text_splitter.split_text(file_content)
44 |             texts.extend(chunks)
45 |             metadata.extend([{'file_path': str(file)}] * len(chunks))
46 |         all_indices = []
47 |         batch = []
48 |         batch_meta = []
49 |         batch_token_count = 0
50 | 
51 |         for text, meta in zip(texts, metadata):
52 |             tokens = num_tokens_from_string(text)
53 | 
54 |             # Start new batch if adding this text exceeds token limit
55 |             if batch_token_count + tokens > 290000:
56 |                 index = FAISS.from_texts(batch, self.openai_client.embeddings, metadatas=batch_meta)
57 |                 all_indices.append(index)
58 |                 batch, batch_meta, batch_token_count = [], [], 0
59 | 
60 |             batch.append(text)
61 |             batch_meta.append(meta)
62 |             batch_token_count += tokens
63 | 
64 |         # Final batch
65 |         if batch:
66 |             index = FAISS.from_texts(batch, self.openai_client.embeddings, metadatas=batch_meta)
67 |             all_indices.append(index)
68 | 
69 |         # Merge all indices
70 |         final_index = all_indices[0]
71 |         for idx in all_indices[1:]:
72 |             final_index.merge_from(idx)
73 |         return FAISS.from_texts(texts, self.openai_client.embeddings, metadatas=metadata)
74 | 
75 |     @staticmethod
76 |     def get_authentication_related_information(faiss_vector_db):
77 |         query = "function to handle authentication information and authorization information"
78 |         docs = faiss_vector_db.similarity_search(str(query), k=4)
79 |         content_list = [doc.page_content.strip() for doc in docs]
80 |         return content_list
81 | 


--------------------------------------------------------------------------------
/user_config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility helpers for capturing and persisting user-specific configuration
 3 | for the Swagger Generator CLI.
 4 | """
 5 | 
 6 | import os, json
 7 | from config import Configurations
 8 | from utils import get_repo_name, get_repo_path
 9 | configurations = Configurations()
10 | 
11 | # Get JSON config file path from environment variable
12 | config_file = os.environ.get("APIMESH_USER_CONFIG_PATH")
13 | if config_file is None:
14 |     raise ValueError(
15 |         "APIMESH_USER_CONFIG_PATH environment variable is not set. "
16 |         "Please set it to the path of your config.json file."
17 |     )
18 | 
19 | # Ensure the directory exists
20 | config_dir = os.path.dirname(config_file)
21 | os.makedirs(config_dir, exist_ok=True)
22 | 
23 | class UserConfigurations:
24 |     def __init__(self, project_api_key, openai_api_key, ai_chat_id, is_mcp):
25 |         self.is_mcp = is_mcp
26 |         self.ai_chat_id = ai_chat_id
27 |         self.add_user_configs(project_api_key, openai_api_key)
28 | 
29 |     @staticmethod
30 |     def load_user_config():
31 |         if os.path.exists(config_file):
32 |             with open(config_file, "r") as file:
33 |                 return json.load(file)
34 |         return {}
35 | 
36 |     @staticmethod
37 |     def save_user_config(config):
38 |         with open(config_file, "w") as file:
39 |             json.dump(config, file, indent=4)
40 | 
41 |     @staticmethod
42 |     def _sanitize_cli_value(value):
43 |         if value is None:
44 |             return ""
45 |         if isinstance(value, str):
46 |             cleaned_value = value.strip()
47 |         else:
48 |             cleaned_value = str(value).strip()
49 |         return cleaned_value if cleaned_value and cleaned_value.lower() != "null" else ""
50 | 
51 |     @staticmethod
52 |     def _print_section_header(title):
53 |         line = "=" * max(len(title) + 10, 50)
54 |         print(f"\n{line}\n{title}\n{line}")
55 | 
56 |     def add_user_configs(self, project_api_key, openai_api_key):
57 |         user_config = self.load_user_config()
58 |         self._print_section_header("OpenAI Credentials")
59 |         stored_openai_api_key = user_config.get("openai_api_key", "")
60 |         sanitized_openai_api_key = self._sanitize_cli_value(openai_api_key)
61 |         if sanitized_openai_api_key:
62 |             resolved_openai_api_key = sanitized_openai_api_key
63 |         elif not stored_openai_api_key and not self.is_mcp:
64 |             resolved_openai_api_key = input(
65 |                 f"Please enter openai api key (default: {stored_openai_api_key}): ") or stored_openai_api_key
66 |         else:
67 |             resolved_openai_api_key = stored_openai_api_key
68 |         user_config["openai_api_key"] = resolved_openai_api_key
69 |         self.save_user_config(user_config)
70 |         print(f"  ✓ API Key: {resolved_openai_api_key}")
71 | 
72 |         self._print_section_header("Model Selection")
73 |         default_openai_model = user_config.get("openai_model", "gpt-5.1-codex")
74 |         openai_model = default_openai_model
75 |         user_config["openai_model"] = openai_model
76 |         self.save_user_config(user_config)
77 |         print(f"  ✓ AI Model: {openai_model}")
78 | 
79 |         self._print_section_header("API Host Configuration")
80 |         default_api_host = user_config.get("api_host", "https://api.example.com")
81 |         api_host = default_api_host
82 |         user_config["api_host"] = api_host
83 |         self.save_user_config(user_config)
84 |         print(f"  ✓ API Host: {api_host}")
85 |         # Check if the user entered something
86 |         if not api_host.strip():
87 |             print("  ✗ No api host provided. Exiting...")
88 |             exit(1)
89 | 


--------------------------------------------------------------------------------
/swagger_mcp.py:
--------------------------------------------------------------------------------
  1 | from mcp.server.fastmcp import FastMCP
  2 | from typing import Optional
  3 | import os, subprocess, shutil, sys
  4 | 
  5 | APP_NAME = "SwaggerGenerator MCP"
  6 | DEFAULT_WORK_DIR = os.path.dirname(os.path.abspath(__file__))
  7 | DEFAULT_SCRIPT_URL = "https://raw.githubusercontent.com/qodex-ai/apimesh/main/bootstrap_mcp_runner.sh"
  8 | 
  9 | mcp = FastMCP(APP_NAME)
 10 | 
 11 | def _require(name: str, val: Optional[str]):
 12 |     if not val or str(val).strip().lower() == "null":
 13 |         raise ValueError(f"Missing required parameter: {name}")
 14 | 
 15 | def _need(cmd: str):
 16 |     if shutil.which(cmd) is None:
 17 |         raise RuntimeError(f"Missing dependency: {cmd} is not on PATH")
 18 | 
 19 | def _ensure_dir(p: str):
 20 |     os.makedirs(p, exist_ok=True)
 21 | 
 22 | @mcp.tool()
 23 | def run_swagger_generation(
 24 |     openai_api_key: str,
 25 |     repo_path: str,
 26 |     timeout_seconds: int = 900
 27 | ) -> dict:
 28 |     """
 29 |     This tool takes the path of the repository, openai_api_key and timeout to generate a openapi spec swagger json for that repo.
 30 |     """
 31 |     _require("openai_api_key", openai_api_key)
 32 |     _require("repo_path", repo_path)
 33 | 
 34 |     for dep in ("bash", "curl", "git", "python3", "pip3"):
 35 |         _need(dep)
 36 | 
 37 |     base_dir = DEFAULT_WORK_DIR
 38 |     _ensure_dir(base_dir)
 39 | 
 40 |     repo_path = os.path.abspath(os.path.expanduser(repo_path))
 41 |     if not os.path.isdir(repo_path):
 42 |         raise ValueError(f"repo_path is not a directory: {repo_path}")
 43 | 
 44 |     # --- fetch script (be sure it's a STRING, not a tuple) ---
 45 |     script_url = DEFAULT_SCRIPT_URL  # <-- no trailing comma
 46 |     script_path = os.path.join(base_dir, "bootstrap_mcp_runner.sh")  # <-- no trailing comma
 47 | 
 48 |     # debug types to Claude's log
 49 |     print(f"[mcp] base_dir={base_dir!r} ({type(base_dir)})", file=sys.stderr)
 50 |     print(f"[mcp] repo_path={repo_path!r} ({type(repo_path)})", file=sys.stderr)
 51 |     print(f"[mcp] script_url={script_url!r} ({type(script_url)})", file=sys.stderr)
 52 |     print(f"[mcp] script_path={script_path!r} ({type(script_path)})", file=sys.stderr)
 53 | 
 54 |     curl = subprocess.run(
 55 |         ["curl", "-sSL", script_url, "-o", script_path],
 56 |         capture_output=True, text=True
 57 |     )
 58 |     if curl.returncode != 0:
 59 |         raise RuntimeError(f"curl failed ({curl.returncode}): {curl.stderr or curl.stdout}")
 60 | 
 61 |     chmod = subprocess.run(["chmod", "+x", script_path], capture_output=True, text=True)
 62 |     if chmod.returncode != 0:
 63 |         raise RuntimeError(f"chmod failed ({chmod.returncode}): {chmod.stderr or chmod.stdout}")
 64 | 
 65 |     # --- env for the script ---
 66 |     env = os.environ.copy()
 67 |     env.update({
 68 |         "OPENAI_API_KEY": openai_api_key,
 69 |         "SWAGGER_BOT_REPO_PATH": repo_path,
 70 |         "WORK_DIR": base_dir,
 71 |     })
 72 | 
 73 |     # --- command (ALL ARGS AS STRINGS) ---
 74 |     cmd = [
 75 |         "bash", script_path,
 76 |         "--repo-path", repo_path,
 77 |         "--openai-api-key", openai_api_key,
 78 |         "--project-api-key", "null",
 79 |         "--ai-chat-id", "null",
 80 |         "--is-mcp", "true",
 81 |     ]
 82 |     print(f"[mcp] running: {cmd} (cwd={base_dir})", file=sys.stderr)
 83 | 
 84 |     proc = subprocess.run(
 85 |         cmd,
 86 |         cwd=base_dir,
 87 |         env=env,
 88 |         capture_output=True,
 89 |         text=True,
 90 |         timeout=timeout_seconds,
 91 |     )
 92 |     os.remove(script_path)
 93 | 
 94 |     result = {
 95 |         "exit_code": proc.returncode,
 96 |         "work_dir": base_dir,
 97 |         "stdout": proc.stdout[-200_000:],
 98 |         "stderr": proc.stderr[-200_000:],
 99 |     }
100 |     return result
101 | 
102 | if __name__ == "__main__":
103 |     print("[mcp] server booted; waiting on stdio", file=sys.stderr)
104 |     mcp.run()
105 | 


--------------------------------------------------------------------------------
/bootstrap_mcp_runner.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | set -euo pipefail
  3 | 
  4 | SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
  5 | 
  6 | REPO_URL="${REPO_URL:-https://github.com/qodex-ai/apimesh.git}"
  7 | BRANCH_NAME="${BRANCH_NAME:-main}"
  8 | REPO_DIR=""
  9 | 
 10 | PROJECT_API_KEY="null"
 11 | OPENAI_API_KEY="null"
 12 | AI_CHAT_ID="null"
 13 | REPO_PATH="$SCRIPT_DIR"
 14 | APIMESH_DIR=""
 15 | VENV_DIR=""
 16 | CLONE_DIR=""
 17 | 
 18 | need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 2; }; }
 19 | need bash; need git; need curl; need python3; need pip3
 20 | 
 21 | while [[ $# -gt 0 ]]; do
 22 |   case "$1" in
 23 |     --project-api-key) PROJECT_API_KEY="${2:-null}"; shift 2 ;;
 24 |     --openai-api-key)  OPENAI_API_KEY="${2:-null}";  shift 2 ;;
 25 |     --ai-chat-id)      AI_CHAT_ID="${2:-null}";      shift 2 ;;
 26 |     --repo-path)       REPO_PATH="${2:-$REPO_PATH}"; shift 2 ;;
 27 |     *) echo "Ignoring unknown arg: $1"; shift ;;
 28 |   esac
 29 | done
 30 | 
 31 | if [[ ! -d "$REPO_PATH" ]]; then
 32 |   echo "Provided --repo-path '$REPO_PATH' is not a directory" >&2
 33 |   exit 3
 34 | fi
 35 | 
 36 | REPO_PATH="$(cd "$REPO_PATH" && pwd)"
 37 | APIMESH_DIR="$REPO_PATH/apimesh"
 38 | VENV_DIR="$APIMESH_DIR/qodexai-virtual-env"
 39 | CLONE_DIR="$APIMESH_DIR/apimesh"
 40 | 
 41 | cleanup() {
 42 |   local exit_code=$?
 43 |   trap - EXIT
 44 |   cd "$SCRIPT_DIR"
 45 | 
 46 |   if [[ -n "${VIRTUAL_ENV:-}" ]]; then
 47 |     deactivate >/dev/null 2>&1 || true
 48 |   fi
 49 | 
 50 |   if [[ -d "$CLONE_DIR" ]]; then
 51 |     echo "Removing cloned repository at '$CLONE_DIR'"
 52 |     rm -rf "$CLONE_DIR"
 53 |   fi
 54 | 
 55 |   if [[ -d "$VENV_DIR" ]]; then
 56 |     echo "Removing virtual environment at '$VENV_DIR'"
 57 |     rm -rf "$VENV_DIR"
 58 |   fi
 59 | 
 60 |   exit "$exit_code"
 61 | }
 62 | 
 63 | trap cleanup EXIT
 64 | 
 65 | mkdir -p "$APIMESH_DIR"
 66 | 
 67 | if [[ -d "$VENV_DIR" ]]; then
 68 |   echo "Virtual environment already exists at '$VENV_DIR'. Removing it."
 69 |   rm -rf "$VENV_DIR"
 70 | fi
 71 | 
 72 | echo "Creating Python venv at $VENV_DIR"
 73 | python3 -m venv "$VENV_DIR"
 74 | source "$VENV_DIR/bin/activate"
 75 | 
 76 | pip3 install --upgrade pip
 77 | pip3 install \
 78 |   "langchain==0.3.16" \
 79 |   "langchain-community==0.3.16" \
 80 |   "langchain-core==0.3.63" \
 81 |   "langchain-openai==0.3.5" \
 82 |   "langsmith==0.1.139" \
 83 |   "openai==1.76.0" \
 84 |   "numpy<2" \
 85 |   "tiktoken==0.8.0" \
 86 |   "faiss-cpu==1.9.0.post1" \
 87 |   "langchain-text-splitters==0.3.4" \
 88 |   "pyyaml==6.0.2" \
 89 |   "tree-sitter==0.25.1" \
 90 |   "tree-sitter-python==0.23.6" \
 91 |   "tree-sitter-javascript==0.23.1" \
 92 |   "tree-sitter-ruby==0.23.1" \
 93 |   "tree-sitter-go==0.25.0" \
 94 |   "tree-sitter-typescript==0.23.2" \
 95 |   "esprima==4.0.1" \
 96 |   "requests"
 97 | 
 98 | # --- repo setup (clone/update specific branch) ---
 99 | if [[ -d "$CLONE_DIR/.git" ]]; then
100 |   echo "Repo exists, switching to branch '$BRANCH_NAME' and pulling latest..."
101 |   git -C "$CLONE_DIR" fetch --prune origin
102 |   git -C "$CLONE_DIR" checkout -B "$BRANCH_NAME" "origin/$BRANCH_NAME"
103 |   git -C "$CLONE_DIR" pull --ff-only origin "$BRANCH_NAME"
104 | else
105 |   echo "Cloning repo branch '$BRANCH_NAME'..."
106 |   if [[ -d "$CLONE_DIR" ]]; then
107 |     rm -rf "$CLONE_DIR"
108 |   fi
109 |   git clone --branch "$BRANCH_NAME" --single-branch "$REPO_URL" "$CLONE_DIR"
110 | fi
111 | # --- end repo setup ---
112 | 
113 | REPO_DIR="$(cd "$CLONE_DIR" && pwd)"
114 | 
115 | export PYTHONPATH="$REPO_PATH:$REPO_DIR:${PYTHONPATH:-}"
116 | export APIMESH_CONFIG_PATH="$REPO_DIR/config.yml"
117 | export APIMESH_USER_CONFIG_PATH="$APIMESH_DIR/config.json"
118 | export APIMESH_USER_REPO_PATH="$REPO_PATH"
119 | export APIMESH_OUTPUT_FILEPATH="$APIMESH_DIR/swagger.json"
120 | 
121 | 
122 | cd "$REPO_DIR"
123 | python3 -m swagger_generation_cli "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID" true
124 | 
125 | exit 0
126 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
  1 | # config.yml
  2 | ignored_dirs:
  3 |   - .git
  4 |   - node_modules
  5 |   - venv
  6 |   - __pycache__
  7 |   - build
  8 |   - dist
  9 |   - tests
 10 |   - test
 11 |   - docs
 12 |   - examples
 13 |   - migrations
 14 |   - tmp
 15 |   - vendor
 16 |   - app/assets
 17 |   - develop-eggs
 18 |   - downloads
 19 |   - eggs
 20 |   - .eggs
 21 |   - lib64
 22 |   - parts
 23 |   - sdist
 24 |   - var
 25 |   - wheels
 26 |   - .egg-info
 27 |   - .env
 28 |   - .venv
 29 |   - ENV
 30 |   - .python-version
 31 |   - .pytest_cache
 32 |   - .coverage
 33 |   - htmlcov
 34 |   - log
 35 |   - db
 36 |   - public
 37 |   - coverage
 38 |   - spec
 39 |   - bundle
 40 |   - .rvmrc
 41 |   - .byebug_history
 42 |   - storage
 43 |   - .pnp
 44 |   - .env.local
 45 |   - .env.development.local
 46 |   - .env.test.local
 47 |   - .env.production.local
 48 |   - .next
 49 |   - .nuxt
 50 |   - .DS_Store
 51 |   - .tscache
 52 |   - .angular
 53 |   - dist-types
 54 |   - target
 55 |   - .apt_generated
 56 |   - .classpath
 57 |   - .factorypath
 58 |   - .project
 59 |   - .settings
 60 |   - .springBeans
 61 |   - .sts4-cache
 62 |   - .gradle
 63 |   - logs
 64 |   - .idea
 65 |   - .vscode
 66 |   - qodexai-virtual-env
 67 |   - apimesh
 68 | 
 69 | routing_patterns_map:
 70 |   ruby_on_rails:
 71 |     - '\bresources\b.*:'
 72 |     - 'namespace\b.*'''
 73 |     - 'Rails\.application\.routes\.draw'
 74 |     - 'root\s+(?:''|")'
 75 |     - 'get\s+[''"]/\w+'
 76 |     - 'post\s+[''"]/\w+'
 77 |     - 'put\s+[''"]/\w+'
 78 |     - 'delete\s+[''"]/\w+'
 79 | 
 80 |   django:
 81 |     - 'path\([''"]'
 82 |     - 'include\([''"]'
 83 |     - 'url\([''"]'
 84 |     - 'urlpatterns\s*='
 85 |     - '@route\([''"]'
 86 |     - '\.(?:get|post|put|delete)_api\('
 87 |     - '@api_view\(\[[''""](?:GET|POST|PUT|DELETE)[''"]'
 88 |     - "ListAPIView"
 89 |     - "CreateAPIView"
 90 |     - "UpdateAPIView"
 91 |     - "DestroyAPIView"
 92 | 
 93 |   express:
 94 |     - 'app\.(?:get|post|put|delete)\([''"]'
 95 |     - 'router\.(?:get|post|put|delete)\([''"]'
 96 |     - 'express\.Router\(\)'
 97 |     - 'app\.use\([''"]'
 98 | 
 99 |   flask:
100 |     - '@app\.route\([''"]'
101 |     - 'app\.(?:get|post|put|delete)\([''"]'
102 |     - '@blueprint\.route\([''"]'
103 |     - 'flask\.Blueprint\('
104 |     - 'app\.register_blueprint\('
105 |     - '@\w+\.route\([''"]'
106 |     - 'Api\('
107 |     - 'Resource\)'
108 |     - 'def (?:get|post|put|delete)\('
109 | 
110 |   fastapi:
111 |     - '@app\.(?:get|post|put|delete)\([''"]'
112 |     - '@router\.(?:get|post|put|delete)\([''"]'
113 |     - 'APIRouter\(\)'
114 |     - 'app\.include_router\('
115 |     - '@app\.middleware\([''"]'
116 | 
117 |   laravel:
118 |     - 'Route::(?:get|post|put|delete)\([''"]'
119 |     - 'Route::resource\([''"]'
120 |     - 'Route::group\('
121 |     - '->middleware\([''"]'
122 | 
123 |   spring:
124 |     - '@RequestMapping\([''"]'
125 |     - '@GetMapping\([''"]'
126 |     - '@PostMapping\([''"]'
127 |     - '@PutMapping\([''"]'
128 |     - '@DeleteMapping\([''"]'
129 |     - "@RestController"
130 |     - "@Controller"
131 |     - "@RequestParam"
132 |     - "@PathVariable"
133 | 
134 |   golang:
135 |     # net/http (standard library)
136 |     - 'http\.HandleFunc\([''"]'
137 |     - 'http\.Handle\([''"]'
138 |     - 'http\.ServeMux\{'
139 | 
140 |     # gorilla/mux
141 |     - 'mux\.HandleFunc\([''"]'
142 |     - 'mux\.Handle\([''"]'
143 |     - 'mux\.NewRouter\('
144 | 
145 |     # gin-gonic/gin
146 |     - 'gin\.Default\('
147 |     - 'gin\.New\('
148 |     - '\b\w+\.(GET|POST|PUT|DELETE|PATCH|OPTIONS|HEAD)\([''"]'
149 | 
150 |     # echo
151 |     - 'echo\.New\('
152 |     - '\b\w+\.(GET|POST|PUT|DELETE|PATCH|OPTIONS|HEAD)\([''"]'
153 | 
154 |     # fiber
155 |     - 'fiber\.New\('
156 |     - '\b\w+\.(Get|Post|Put|Delete|Patch|Options|Head)\([''"]'
157 | 
158 |   nestjs:
159 |     - '@Controller\\([''"]'
160 |     - '@(Get|Post|Put|Delete|Patch|Options|Head)\\('
161 |     - 'NestFactory\\.create\\('
162 |     - 'app\\.select\\('
163 |     - 'app\\.useGlobalPipes\\('
164 |     - '@Module\\('
165 | 
166 | gpt_4o_model_name: "gpt-5.1-codex"
167 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | set -e
  4 | 
  5 | # Configuration
  6 | APIMESH_DIR="apimesh"
  7 | VENV_DIR="$APIMESH_DIR/qodexai-virtual-env"
  8 | REPO_URL="https://github.com/qodex-ai/apimesh.git"
  9 | CLONE_DIR="$APIMESH_DIR/apimesh"
 10 | CURRENT_DIR="$(pwd)"
 11 | 
 12 | # Default values for optional parameters
 13 | OPENAI_API_KEY=""
 14 | PROJECT_API_KEY=""
 15 | AI_CHAT_ID=""
 16 | 
 17 | # Parse command line arguments
 18 | while [[ $# -gt 0 ]]; do
 19 |   case "$1" in
 20 |     --openai-api-key)
 21 |       OPENAI_API_KEY="$2"
 22 |       shift 2
 23 |       ;;
 24 |     --project-api-key)
 25 |       PROJECT_API_KEY="$2"
 26 |       shift 2
 27 |       ;;
 28 |     --ai-chat-id)
 29 |       AI_CHAT_ID="$2"
 30 |       shift 2
 31 |       ;;
 32 |     *)
 33 |       echo "Unknown option: $1"
 34 |       echo "Usage: $0 [--openai-api-key KEY] [--project-api-key KEY] [--ai-chat-id ID]"
 35 |       exit 1
 36 |       ;;
 37 |   esac
 38 | done
 39 | 
 40 | # Cleanup function
 41 | cleanup() {
 42 |     local exit_code=$?
 43 |     trap - EXIT
 44 |     
 45 |     # Deactivate virtual environment if active
 46 |     if command -v deactivate >/dev/null 2>&1; then
 47 |         deactivate >/dev/null 2>&1 || true
 48 |     fi
 49 |     
 50 |     # Remove cloned repository
 51 |     if [[ -d "$CLONE_DIR" ]]; then
 52 |         echo "Removing cloned repository at '$CLONE_DIR'"
 53 |         rm -rf "$CLONE_DIR"
 54 |     fi
 55 |     
 56 |     # Remove virtual environment
 57 |     if [[ -d "$VENV_DIR" ]]; then
 58 |         echo "Removing virtual environment at '$VENV_DIR'"
 59 |         rm -rf "$VENV_DIR"
 60 |     fi
 61 |     
 62 |     # Remove this script itself (last step)
 63 |     if [[ -f "$0" ]]; then
 64 |         echo "Removing script '$0'"
 65 |         rm -f "$0"
 66 |     fi
 67 |     
 68 |     exit "$exit_code"
 69 | }
 70 | 
 71 | # Set trap for cleanup on exit
 72 | trap cleanup EXIT
 73 | 
 74 | # Step 1: Create apimesh folder
 75 | echo "Creating apimesh folder..."
 76 | mkdir -p "$APIMESH_DIR"
 77 | echo "Created folder: $APIMESH_DIR"
 78 | echo ""
 79 | 
 80 | # Step 2: Create Python virtual environment
 81 | echo "Creating Python virtual environment..."
 82 | if [[ -d "$VENV_DIR" ]]; then
 83 |     echo "Virtual environment already exists at '$VENV_DIR'. Removing it..."
 84 |     rm -rf "$VENV_DIR"
 85 | fi
 86 | 
 87 | python3 -m venv "$VENV_DIR"
 88 | echo "Virtual environment created at '$VENV_DIR'"
 89 | echo ""
 90 | 
 91 | # Step 3: Activate virtual environment and install dependencies
 92 | echo "Activating virtual environment..."
 93 | source "$VENV_DIR/bin/activate"
 94 | echo "Virtual environment activated"
 95 | echo ""
 96 | 
 97 | echo "Installing Python dependencies..."
 98 | pip3 install --quiet --upgrade pip
 99 | pip3 install \
100 |   "langchain==0.3.16" \
101 |   "langchain-community==0.3.16" \
102 |   "langchain-core==0.3.63" \
103 |   "langchain-openai==0.3.5" \
104 |   "langsmith==0.1.139" \
105 |   "openai==1.76.0" \
106 |   "tiktoken==0.8.0" \
107 |   "faiss-cpu==1.9.0.post1" \
108 |   "langchain-text-splitters==0.3.4" \
109 |   "pyyaml==6.0.2" \
110 |   "numpy<2" \
111 |   "tree-sitter==0.25.1" \
112 |   "tree-sitter-python==0.23.6" \
113 |   "tree-sitter-javascript==0.23.1" \
114 |   "tree-sitter-ruby==0.23.1" \
115 |   "tree-sitter-go==0.25.0" \
116 |   "tree-sitter-typescript==0.23.2" \
117 |   "esprima==4.0.1" \
118 |   "requests"
119 | echo "Dependencies installed"
120 | echo ""
121 | 
122 | # Step 4: Clone the repository
123 | echo "Cloning repository from $REPO_URL..."
124 | if [[ -d "$CLONE_DIR" ]]; then
125 |     echo "Repository already exists at '$CLONE_DIR'. Removing it..."
126 |     rm -rf "$CLONE_DIR"
127 | fi
128 | 
129 | git clone "$REPO_URL" "$CLONE_DIR"
130 | echo "Repository cloned to '$CLONE_DIR'"
131 | echo ""
132 | 
133 | # Step 5: Run the swagger generation CLI
134 | echo "Running swagger generation CLI..."
135 | echo "REPO_PATH: $CURRENT_DIR"
136 | echo "OPENAI_API_KEY: ${OPENAI_API_KEY:+***}"
137 | echo "PROJECT_API_KEY: ${PROJECT_API_KEY:+***}"
138 | echo "AI_CHAT_ID: ${AI_CHAT_ID:+***}"
139 | echo ""
140 | 
141 | # Add current directory and cloned directory to PYTHONPATH so Python can find modules
142 | export PYTHONPATH="$CURRENT_DIR:$CLONE_DIR:$PYTHONPATH"
143 | 
144 | # Set config paths
145 | export APIMESH_CONFIG_PATH="$CLONE_DIR/config.yml"
146 | export APIMESH_USER_CONFIG_PATH="$CURRENT_DIR/apimesh/config.json"
147 | export APIMESH_USER_REPO_PATH="$CURRENT_DIR"
148 | export APIMESH_OUTPUT_FILEPATH="$CURRENT_DIR/apimesh/swagger.json"
149 | 
150 | python3 -m apimesh.apimesh.swagger_generation_cli "$OPENAI_API_KEY" "$PROJECT_API_KEY" "$AI_CHAT_ID"
151 | 
152 | CLI_EXIT_CODE=$?
153 | 
154 | echo ""
155 | echo "Swagger generation finished with status $CLI_EXIT_CODE."
156 | 
157 | # Cleanup will happen automatically via trap
158 | exit "$CLI_EXIT_CODE"
159 | 


--------------------------------------------------------------------------------
/endpoints_extractor.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | from llm_client import OpenAiClient
  3 | from config import Configurations
  4 | import prompts
  5 | from concurrent.futures import ThreadPoolExecutor, as_completed
  6 | import time
  7 | 
  8 | config = Configurations()
  9 | 
 10 | class EndpointsExtractor:
 11 |     def __init__(self):
 12 |         self.openai_client = OpenAiClient()
 13 | 
 14 |     def extract_endpoints_with_gpt(self, file_path, framework):
 15 |         print("\n***************************************************")
 16 |         print(f"Started finding endpoints for {file_path}")
 17 |         with open(file_path, 'r', encoding='utf-8') as file:
 18 |             file_content = file.read()
 19 |         if framework == "ruby_on_rails":
 20 |             content = prompts.ruby_on_rails_endpoint_extractor_prompt.format(file_content = file_content)
 21 |             messages = [
 22 |                 {"role": "system", "content": prompts.ruby_on_rails_endpoint_extractor_system_prompt},
 23 |                 {"role": "user", "content": content}
 24 |             ]
 25 |         elif framework == "express":
 26 |             content = prompts.express_endpoint_extractor_prompt.format(file_content = file_content)
 27 |             messages = [
 28 |                 {"role": "system", "content": prompts.express_endpoint_extractor_system_prompt},
 29 |                 {"role": "user", "content": content}
 30 |             ]
 31 |         elif framework == "django":
 32 |             content = prompts.django_endpoint_extractor_prompt.format(file_content = file_content)
 33 |             messages = [
 34 |                 {"role": "system", "content": prompts.django_endpoint_extractor_system_prompt},
 35 |                 {"role": "user", "content": content}
 36 |             ]
 37 | 
 38 |         elif framework == "flask":
 39 |             content = prompts.flask_endpoint_extractor_prompt.format(file_content = file_content)
 40 |             messages = [
 41 |                 {"role": "system", "content": prompts.flask_endpoint_extractor_system_prompt},
 42 |                 {"role": "user", "content": content}
 43 |             ]
 44 |         elif framework == "fastapi":
 45 |             content = prompts.fastapi_endpoint_extractor_prompt.format(file_content = file_content)
 46 |             messages = [
 47 |                 {"role": "system", "content": prompts.fastapi_endpoint_extractor_system_prompt},
 48 |                 {"role": "user", "content": content}
 49 |             ]
 50 | 
 51 |         elif framework == "golang":
 52 |             content = prompts.golang_endpoint_extractor_prompt.format(file_content=file_content)
 53 |             messages = [
 54 |                 {"role": "system", "content": prompts.golang_endpoint_extractor_system_prompt},
 55 |                 {"role": "user", "content": content}
 56 |             ]
 57 |         # Call the OpenAI API
 58 |         response = self.openai_client.call_chat_completion(messages=messages, temperature=0)
 59 |         start = response.find('[')
 60 |         end = response.rfind(']') + 1
 61 |         json_like_string = response[start:end]
 62 | 
 63 |         try:
 64 |             # Convert the JSON-like string to a Python list
 65 |             parsed_list = ast.literal_eval(json_like_string)
 66 |         except (ValueError, SyntaxError):
 67 |             print("Error parsing JSON-like string from GPT response")
 68 |             parsed_list = []
 69 | 
 70 |         print(f"Completed finding endpoints for {file_path}")
 71 |         return parsed_list
 72 | 
 73 |     @staticmethod
 74 |     def get_endpoint_related_information(faiss_vector_db, endpoints):
 75 |         print("\n***************************************************")
 76 |         print(f"Started generating endpoint related information for {len(endpoints)} endpoints")
 77 |         start_time = time.time()
 78 |         completed = 0
 79 | 
 80 |         def process_endpoint(endpoint):
 81 |             query = f"This is the Method: {endpoint['method']} and this is the Endpoint Path: {endpoint['path']} fetch the controller information for the endpoint."
 82 |             docs = faiss_vector_db.similarity_search(str(query), k=4)
 83 |             content_list = [doc.page_content.strip() for doc in docs]
 84 |             return {'method': endpoint['method'], 'path': endpoint['path'], 'info': content_list}
 85 | 
 86 |         endpoint_related_content = []
 87 |         with ThreadPoolExecutor(max_workers=8) as executor:
 88 |             future_to_endpoint = {executor.submit(process_endpoint, endpoint): endpoint
 89 |                                   for endpoint in endpoints}
 90 | 
 91 |             for future in as_completed(future_to_endpoint):
 92 |                 endpoint_related_content.append(future.result())
 93 |                 completed += 1
 94 |                 end_time = time.time()
 95 |                 print(
 96 |                     f"Completed generating endpoint related information for {completed} endpoints in {int(end_time - start_time)} seconds",
 97 |                     end="\r")
 98 | 
 99 |         return endpoint_related_content
100 | 


--------------------------------------------------------------------------------
/python_pipeline/identify_api_functions.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import ast
  3 | import json
  4 | 
  5 | API_DECORATOR_NAMES = {
  6 |     'route', 'get', 'post', 'put', 'delete', 'patch',
  7 |     'api', 'endpoint', 'router', 'viewset', 'view'
  8 | }
  9 | 
 10 | def has_api_decorator(decorator_node):
 11 |     if isinstance(decorator_node, ast.Call) and hasattr(decorator_node.func, 'attr'):
 12 |         if decorator_node.func.attr.lower() in API_DECORATOR_NAMES:
 13 |             return True
 14 |     if isinstance(decorator_node, ast.Attribute):
 15 |         if decorator_node.attr.lower() in API_DECORATOR_NAMES:
 16 |             return True
 17 |     if isinstance(decorator_node, ast.Name):
 18 |         if decorator_node.id.lower() in API_DECORATOR_NAMES:
 19 |             return True
 20 |     return False
 21 | 
 22 | 
 23 | def extract_route_from_decorator(decorator_node):
 24 |     if isinstance(decorator_node, ast.Call):
 25 |         if decorator_node.args:
 26 |             first_arg = decorator_node.args[0]
 27 |             if isinstance(first_arg, ast.Str):
 28 |                 return first_arg.s
 29 |             elif isinstance(first_arg, ast.Constant):  # For Python 3.8+
 30 |                 if isinstance(first_arg.value, str):
 31 |                     return first_arg.value
 32 |     return None
 33 | 
 34 | 
 35 | def find_api_endpoints(file_path):
 36 |     try:
 37 |         source = file_path.read_text(encoding='utf-8')
 38 |         tree = ast.parse(source, filename=str(file_path))
 39 |     except Exception:
 40 |         return []
 41 |     endpoints = []
 42 |     class_endpoints = {}
 43 |     for node in ast.walk(tree):
 44 |         if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)) and not isinstance(getattr(node, 'parent', None),
 45 |                                                                                         ast.ClassDef):
 46 |             for dec in node.decorator_list:
 47 |                 if has_api_decorator(dec):
 48 |                     route = extract_route_from_decorator(dec)
 49 |                     endpoints.append({
 50 |                         "type": "function",
 51 |                         "name": node.name,
 52 |                         "start_line": node.lineno,
 53 |                         "end_line": getattr(node, 'end_lineno', None),
 54 |                         "route": route,
 55 |                         "file_path": str(file_path)
 56 |                     })
 57 |         if isinstance(node, ast.ClassDef):
 58 |             class_has_decorator = any(has_api_decorator(dec) for dec in node.decorator_list)
 59 |             class_route = None
 60 |             for dec in node.decorator_list:
 61 |                 if has_api_decorator(dec):
 62 |                     class_route = extract_route_from_decorator(dec)
 63 |                     break
 64 |             if class_has_decorator:
 65 |                 class_endpoint = {
 66 |                     "type": "class",
 67 |                     "name": node.name,
 68 |                     "start_line": node.lineno,
 69 |                     "end_line": getattr(node, 'end_lineno', None),
 70 |                     "route": class_route,
 71 |                     "file_path": str(file_path),
 72 |                     "methods": []
 73 |                 }
 74 |                 class_endpoints[node.name] = class_endpoint
 75 |                 endpoints.append(class_endpoint)
 76 |             for body_item in node.body:
 77 |                 if isinstance(body_item, (ast.FunctionDef, ast.AsyncFunctionDef)):
 78 |                     method_route = None
 79 |                     method_has_decorator = any(has_api_decorator(dec) for dec in body_item.decorator_list)
 80 |                     if method_has_decorator:
 81 |                         for dec in body_item.decorator_list:
 82 |                             if has_api_decorator(dec):
 83 |                                 method_route = extract_route_from_decorator(dec)
 84 |                                 if method_route:
 85 |                                     break
 86 |                     if method_has_decorator or class_has_decorator:
 87 |                         method_entry = {
 88 |                             "type": "method",
 89 |                             "name": body_item.name,
 90 |                             "start_line": body_item.lineno,
 91 |                             "end_line": getattr(body_item, 'end_lineno', None),
 92 |                             "route": method_route if method_route else class_route,
 93 |                             "file_path": str(file_path)
 94 |                         }
 95 |                         if node.name in class_endpoints:
 96 |                             class_endpoints[node.name]["methods"].append(method_entry)
 97 |     return endpoints
 98 | 
 99 | 
100 | def set_parents(tree):
101 |     for node in ast.walk(tree):
102 |         for child in ast.iter_child_nodes(node):
103 |             child.parent = node
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     api_files = ['/Users/ankits/PycharmProjects/data-science-model-serving/app.py', '/Users/ankits/PycharmProjects/data-science-model-serving/apps/training/run.py', '/Users/ankits/PycharmProjects/data-science-model-serving/apps/prediction/run.py']
108 |     py_files = [Path(file) for file in api_files]  # Convert to Path objects
109 |     all_endpoints = []
110 |     for py_file in py_files:
111 |         try:
112 |             source = py_file.read_text(encoding="utf-8")
113 |             tree = ast.parse(source)
114 |             set_parents(tree)
115 |             eps = find_api_endpoints(py_file)
116 |             if eps:
117 |                 all_endpoints.extend(eps)
118 |         except Exception:
119 |             continue
120 |     print(json.dumps(all_endpoints, indent=2))


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | support@qodex.ai.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), [version 2.0](https://www.contributor-covenant.org/version/2/0/code_of_conduct.html).
118 | 
119 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
120 | enforcement ladder](https://github.com/mozilla/diversity).
121 | 
122 | 
123 | - For answers to common questions about this code of conduct, see the FAQs at
124 | https://www.contributor-covenant.org/faq.
125 | - Translations are available at
126 | https://www.contributor-covenant.org/translations.
127 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ApiMesh: Code to OpenAPI Docs, Instantly
  2 | 
  3 | [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue.svg)](https://www.python.org/downloads/)
  4 | [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
  5 | [![Docker Build](https://img.shields.io/badge/docker%20build-passing-22c55e?logo=docker&logoColor=white)](https://github.com/qodex-ai/apimesh/actions/workflows/docker-build.yml)
  6 | [![Tutorial](https://img.shields.io/badge/%F0%9F%93%96%20tutorial-get%20started-brightgreen.svg)](#quick-start-30-seconds)
  7 | [![Examples](https://img.shields.io/badge/%F0%9F%9A%80%20examples-ready--to--run-orange.svg)](#quick-start-30-seconds)
  8 | [![Discord](https://img.shields.io/badge/Discord-Join%20Community-5865f2?logo=discord&logoColor=white)](https://discord.gg/MHDayrP7)
  9 | [![Twitter](https://img.shields.io/badge/Twitter-Follow%20Updates-1da1f2?logo=x&logoColor=white)](https://x.com/qodex_ai)
 10 | 
 11 | **Open-Source OpenAPI Generator** – Automatically scan your codebase, generate **accurate OpenAPI 3.0 specs**, and render a **beautiful interactive HTML API UI** for instant exploration.
 12 | 
 13 | **From code to live API docs in seconds** — no manual writing, no drift, no hassle.
 14 | 
 15 | ---
 16 | 
 17 | ## Overview
 18 | 
 19 | **ApiMesh** is the AI-powered open-source tool that:
 20 | 
 21 | - Scans your codebase automatically.
 22 | - Discovers all REST API endpoints, parameters, auth, and schemas.
 23 | - Generates a **valid `swagger.json` (OpenAPI 3.0)**.
 24 | - **Renders `apimesh-docs.html`** — a **fully interactive API UI** powered by Swagger UI.
 25 | 
 26 | ![img.png](img.png)
 27 | > **Open the HTML file in any browser. No server. No setup. Just click and explore.**
 28 | 
 29 | ---
 30 | 
 31 | ### ✨ Key Features
 32 | 
 33 | | Feature | Benefit |
 34 | |-------|--------|
 35 | | 🔍 **Smart Code Discovery** | Finds endpoints across frameworks — no annotations needed |
 36 | | 📄 **OpenAPI 3.0 Spec** | `swagger.json` ready for CI/CD, gateways, and tools |
 37 | | 🌐 **Interactive HTML UI** | `apimesh-docs.html` — **instant API playground** with try-it-out |
 38 | | 🌍 **Multi-Language** | Python, Node.js, Ruby on Rails, Go, and more |
 39 | | ⚡ **Zero Config Start** | One command → full docs + UI |
 40 | | 📱 **Self-Contained HTML** | Share via email, GitHub, or CDN — works offline |
 41 | 
 42 | ---
 43 | 
 44 | ### 🧠 How It Works
 45 | 
 46 | A **precise, AI-augmented pipeline** ensures reliable, up-to-date docs:
 47 | 
 48 | 1. **Scan Repo** → `FileScanner` walks your code (respects `.gitignore` + `config.yml`)
 49 | 2. **Detect Framework** → Heuristics + LLM identify Express, FastAPI, Rails, etc.
 50 | 3. **Harvest Endpoints** → Native parsers + LLM extract routes, methods, schemas
 51 | 4. **Enrich Context** → Vector embeddings pull auth, models, examples per endpoint
 52 | 5. **Generate Spec** → `swagger.json` built with OpenAI precision
 53 | 6. **Render UI** → **`apimesh-docs.html`** embedded with **Swagger UI** — fully interactive
 54 | 7. **Optional Sync** → Push to **Qodex.ai** for auto-tests and security scans
 55 | 
 56 | ---
 57 | 
 58 | ### 🌐 Supported Languages & Frameworks
 59 | 
 60 | | Language | Frameworks | Detection Method |
 61 | |--------|------------|------------------|
 62 | | **Python** | Django, Flask, FastAPI, DRF | Route files + decorators |
 63 | | **Node.js / TS** | Express, NestJS | `app.get`, `Router`, decorators |
 64 | | **Ruby on Rails** | Rails | `routes.rb` + controllers |
 65 | | **Go** | Gin, Echo, Fiber, Chi, Gorilla Mux, net/http | Tree-sitter router analysis |
 66 | | **Java, etc.** | Any REST | LLM fallback + patterns |
 67 | 
 68 | > Add custom patterns in `config.yml` — PRs welcome!
 69 | 
 70 | ---
 71 | 
 72 | ### 📂 Output Files
 73 | 
 74 | | File | Location | Purpose |
 75 | |------|----------|--------|
 76 | | `swagger.json` | `apimesh/swagger.json` | OpenAPI 3.0 spec |
 77 | | **`apimesh-docs.html`** | `apimesh/apimesh-docs.html` | **Interactive API UI** — open in browser |
 78 | | `config.json` | `apimesh/config.json` | Persisted CLI configuration (repo path, host, API keys) |
 79 | | `config.yml` | Repo root | Customize scan, host, ignores |
 80 | 
 81 | > **Deploy `apimesh-docs.html` to GitHub Pages, Netlify, or Vercel in 1 click.**
 82 | 
 83 | ---
 84 | 
 85 | ## Quick Start (30 Seconds)
 86 | 
 87 | ### Option 1: docker (Recommended)
 88 | 
 89 | Navigate to your repository
 90 | ```bash
 91 | cd /path/to/your/repo
 92 | ```
 93 | 
 94 | Run interactively - will prompt for any missing inputs
 95 | ```bash
 96 | docker run --pull always -it --rm -v $(pwd):/workspace qodexai/apimesh:latest
 97 | ```
 98 | 
 99 | ### Option 2: Using MCP
100 | 
101 | Download the MCP server file
102 | 
103 | ```bash
104 | curl https://raw.githubusercontent.com/qodex-ai/apimesh/main/swagger_mcp.py -o swagger_mcp.py
105 | ```
106 | 
107 | Add this to your MCP settings
108 | ```bash
109 | {
110 |   "mcpServers": {
111 |     "apimesh": {
112 |       "command": "uv",
113 |       "args": ["run", "/path/to/swagger_mcp/swagger_mcp.py"]
114 |     }
115 |   }
116 | }
117 | ```
118 | 
119 | Replace /path/to/swagger_mcp/swagger_mcp.py with the actual file path.
120 | 
121 | 
122 | ### Option 3: Curl
123 | 
124 | Navigate to your repository
125 | ```bash
126 | cd /path/to/your/repo
127 | ```
128 | 
129 | Inside your repo root
130 | ```bash
131 | mkdir -p apimesh && \
132 |   curl -sSL https://raw.githubusercontent.com/qodex-ai/apimesh/refs/heads/main/run.sh -o apimesh/run.sh && \
133 |   chmod +x apimesh/run.sh && apimesh/run.sh
134 | ```
135 | 
136 | > Each run leaves `swagger.json`, `apimesh-docs.html`, `run.sh`, and `config.json` side-by-side inside the `apimesh/` workspace folder.
137 | 
138 | ---
139 | 
140 | ## 🤝 Contributing
141 | 
142 | Contributions are welcome!
143 | 
144 | Open an issue for bugs, feature requests, or improvements.
145 | 
146 | Submit PRs to enhance language/framework coverage.
147 | 
148 | Help us make API documentation automatic and effortless 🚀
149 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
  1 | # Contributing to Swagger Generator
  2 | 
  3 | First off — thanks for taking the time to contribute! 🎉  
  4 | This document explains how to propose changes, report issues, and help improve the project.
  5 | 
  6 | > **Project summary:** Swagger Generator analyzes a codebase and produces an OpenAPI (Swagger) JSON. You can run it via a one-liner shell script or as an MCP server. See the [README](./README.md) for setup and usage details.
  7 | 
  8 | ---
  9 | 
 10 | ## 📜 Code of Conduct
 11 | 
 12 | By participating, you agree to uphold our [Code of Conduct](./CODE_OF_CONDUCT.md).  
 13 | If you witness or experience unacceptable behavior, please report it per that document.
 14 | 
 15 | ## 🔒 Security
 16 | 
 17 | Please **do not** open public issues for security vulnerabilities.  
 18 | Follow the responsible disclosure process in our [Security Policy](./security.md).
 19 | 
 20 | ## 🪪 License
 21 | 
 22 | By contributing, you agree that your contributions will be licensed under the
 23 | [AGPL-3.0 License](./LICENSE.md).
 24 | 
 25 | ---
 26 | 
 27 | ## 🧭 How to Contribute
 28 | 
 29 | ### 1) Report bugs & request features
 30 | - Search existing [Issues](https://github.com/qodex-ai/apimesh/issues) first.
 31 | - If none exist, open a new issue with:
 32 |   - **What happened** and **what you expected**
 33 |   - **Steps to reproduce** (repo, command, flags, logs)
 34 |   - Environment details (OS, Python version, shell)
 35 | 
 36 | ### 2) Propose improvements
 37 | - For larger changes, open an issue first to discuss design/approach.
 38 | - Small fixes (typos, docs, comments) can go straight to a PR.
 39 | 
 40 | ---
 41 | 
 42 | ## 🛠️ Development Setup
 43 | 
 44 | > The repo primarily contains Python and a couple of shell scripts. You can run the tool either via the helper script or directly as an MCP server.
 45 | 
 46 | ### Prerequisites
 47 | - A recent Python 3.x
 48 | - Git + a shell (bash/zsh)
 49 | - (Optional) [uv](https://docs.astral.sh/uv/) or a virtual environment tool
 50 | 
 51 | ### Get the code
 52 | ```bash
 53 | git clone https://github.com/qodex-ai/apimesh.git
 54 | cd apimesh
 55 | ```
 56 | 
 57 | ### Running the generator (two common paths)
 58 | 
 59 | **A) One-liner script (quickest)**  
 60 | Create a dedicated `apimesh` workspace folder inside the repo you want to analyze.
 61 | ```bash
 62 | # Fetch and run the helper script (see README for the latest command/flags)
 63 | mkdir -p apimesh
 64 | cd apimesh
 65 | curl -sSL https://raw.githubusercontent.com/qodex-ai/apimesh/refs/heads/main/run.sh -o run.sh
 66 | chmod +x run.sh
 67 | ./run.sh --repo-path .. --project-api-key {project_api_key} --ai-chat-id {ai_chat_id}
 68 | ```
 69 | 
 70 | > After completion you should always see `config.json`, `swagger.json`, `apimesh-docs.html`, and `run.sh` inside your repo's `apimesh/` workspace.
 71 | 
 72 | > The bootstrap helper removes its temporary clone and virtual environment after it finishes generating docs, so rerun the snippet whenever you need to refresh the output.
 73 | 
 74 | **B) Run as an MCP server**
 75 | ```bash
 76 | # Fetch the MCP server file if needed
 77 | # (If you already have it locally from the clone, point to that path instead)
 78 | wget https://raw.githubusercontent.com/qodex-ai/apimesh/main/swagger_mcp.py -O swagger_mcp.py
 79 | 
 80 | # Example MCP client config snippet (adjust path/command to your setup)
 81 | # {
 82 | #   "mcpServers": {
 83 | #     "apimesh": {
 84 | #       "command": "uv",
 85 | #       "args": ["run", "/absolute/path/to/swagger_mcp.py"]
 86 | #     }
 87 | #   }
 88 | # }
 89 | ```
 90 | 
 91 | > After running, you should see a `swagger.json` emitted in the target repo path.
 92 | 
 93 | ---
 94 | 
 95 | ## 🧹 Style, Linting & Commit Messages
 96 | 
 97 | We aim for clear, readable Python and tidy shell scripts.
 98 | 
 99 | - **Python**
100 |   - Prefer small, focused functions.
101 |   - Add docstrings and inline comments where logic is non-obvious.
102 |   - Keep imports organized and avoid unused imports.
103 | - **Shell**
104 |   - Use `set -euo pipefail` for robustness when appropriate.
105 |   - Quote variables; avoid bashisms if not needed.
106 | 
107 | **Commit messages**
108 | - Use present tense and be descriptive:  
109 |   `feat: add repository path validation`, `fix: handle empty swagger output`, `docs: clarify MCP setup`
110 | - Reference issues when applicable: `Fixes #123`
111 | 
112 | ---
113 | 
114 | ## ✅ Pull Request Checklist
115 | 
116 | Before you open a PR:
117 | 
118 | - [ ] The change is documented (README or inline comments as needed).
119 | - [ ] Scripts still work (`run.sh`, `bootstrap_mcp_runner.sh` if applicable).
120 | - [ ] Any new flags or behavior are reflected in the README examples.
121 | - [ ] Code is reasonably linted/typed (if you added type hints).
122 | - [ ] Tests added or manual test steps documented (see below).
123 | - [ ] No secrets or API keys committed.
124 | 
125 | Open your PR against the `main` branch and fill out the template (or describe):
126 | - **What** the change does
127 | - **Why** it’s needed
128 | - **How** you validated it
129 | 
130 | ---
131 | 
132 | ## 🧪 Testing Changes
133 | 
134 | This project currently relies primarily on **manual validation**. Please include a short note in your PR describing how you tested:
135 | 
136 | **Suggested manual test flow**
137 | 1. Choose a small public repo with a few HTTP endpoints (or a simple local sample).
138 | 2. Run the generator using your change (script or MCP path).
139 | 3. Verify a `swagger.json` was generated.
140 | 4. Open it in Swagger UI / an OpenAPI viewer to confirm endpoints, paths, and schemas look correct.
141 | 5. Try edge cases your change might affect (e.g., unusual file layout, multiple languages, missing dependencies).
142 | 
143 | If you add unit tests:
144 | - Place them under a `tests/` folder.
145 | - Keep tests hermetic; avoid requiring network access whenever possible.
146 | 
147 | ---
148 | 
149 | ## 🧱 Project Structure (high level)
150 | 
151 | - `swagger_mcp.py` — MCP server entry and core orchestration.
152 | - `legacy_swagger_pipeline.py`, `run.sh`, `bootstrap_mcp_runner.sh` — runner/helper scripts.
153 | - `ruby_dependencies.py` — language-specific helpers (example).
154 | - `README.md`, `CODE_OF_CONDUCT.md`, `security.md`, `LICENSE.md` — docs & policies.
155 | 
156 | (Filenames can evolve; check the tree for the latest layout.)
157 | 
158 | ---
159 | 
160 | ## 🗣️ Communication
161 | 
162 | - Use GitHub Issues for bugs and feature requests.
163 | - Use PR comments for code review discussions.
164 | - Be respectful, constructive, and kind (see [Code of Conduct](./CODE_OF_CONDUCT.md)).
165 | 
166 | ---
167 | 
168 | ## 🙏 Acknowledgements
169 | 
170 | Thanks for improving Swagger Generator! Every issue, PR, and suggestion helps make the tool better for everyone.
171 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import tiktoken
  2 | import subprocess
  3 | import os
  4 | import re
  5 | 
  6 | def num_tokens_from_string(string: str, encoding_name: str = "cl100k_base") -> int:
  7 |     encoding = tiktoken.get_encoding(encoding_name)
  8 |     return len(encoding.encode(string))
  9 | 
 10 | def get_repo_path() -> str:
 11 |     """
 12 |     Get the repository path from APIMESH_USER_REPO_PATH environment variable.
 13 |     
 14 |     Returns:
 15 |         Repository path as a string (assumes APIMESH_USER_REPO_PATH is always set).
 16 |     """
 17 |     repo_path = os.environ["APIMESH_USER_REPO_PATH"]
 18 |     return os.path.abspath(repo_path)
 19 | 
 20 | def get_repo_name() -> str:
 21 |     """
 22 |     Get the repository name from git remote URL.
 23 |     
 24 |     Returns:
 25 |         Repository name extracted from git remote URL, or basename of path if git remote is not available.
 26 |     """
 27 |     repo_path = get_repo_path()
 28 |     try:
 29 |         original_dir = os.getcwd()
 30 |         try:
 31 |             os.chdir(repo_path)
 32 |             result = subprocess.run(
 33 |                 ['git', 'remote', 'get-url', 'origin'],
 34 |                 capture_output=True,
 35 |                 text=True,
 36 |                 timeout=5,
 37 |                 check=False
 38 |             )
 39 |             os.chdir(original_dir)
 40 |         except Exception:
 41 |             os.chdir(original_dir)
 42 |             return os.path.basename(repo_path)
 43 |         
 44 |         if result.returncode == 0 and result.stdout:
 45 |             remote_url = result.stdout.strip()
 46 |             # Extract repo name from various git URL formats
 47 |             # SSH: git@github.com:owner/repo.git -> repo
 48 |             # HTTPS: https://github.com/owner/repo.git -> repo
 49 |             ssh_pattern = r'git@[^:]+:(?:[^/]+/)?([^/]+?)(?:\.git)?$'
 50 |             https_pattern = r'https?://(?:[^@]+@)?[^/]+/[^/]+/([^/]+?)(?:\.git)?$'
 51 |             
 52 |             ssh_match = re.match(ssh_pattern, remote_url)
 53 |             if ssh_match:
 54 |                 return ssh_match.group(1)
 55 |             
 56 |             https_match = re.match(https_pattern, remote_url)
 57 |             if https_match:
 58 |                 return https_match.group(1)
 59 |         
 60 |         # Fallback to basename if git remote doesn't match expected patterns
 61 |         return os.path.basename(repo_path)
 62 |     except Exception:
 63 |         # Fallback to basename if any error occurs
 64 |         return os.path.basename(repo_path)
 65 | 
 66 | def format_repo_name(repo_name: str) -> str:
 67 |     """
 68 |     Format repository name for display.
 69 |     Converts snake_case, kebab-case, or camelCase to Title Case with spaces.
 70 |     
 71 |     Examples:
 72 |         sample_rails_app -> Sample Rails App
 73 |         sample-rails-app -> Sample Rails App
 74 |         sampleRailsApp -> Sample Rails App
 75 |     
 76 |     Args:
 77 |         repo_name: Raw repository name
 78 |     
 79 |     Returns:
 80 |         Formatted repository name in Title Case
 81 |     """
 82 |     # Replace underscores and hyphens with spaces
 83 |     formatted = repo_name.replace('_', ' ').replace('-', ' ')
 84 |     
 85 |     # Insert spaces before capital letters (for camelCase)
 86 |     formatted = re.sub(r'([a-z])([A-Z])', r'\1 \2', formatted)
 87 |     
 88 |     # Convert to title case (capitalize first letter of each word)
 89 |     formatted = formatted.title()
 90 |     
 91 |     return formatted
 92 | 
 93 | def get_output_filepath() -> str:
 94 |     """
 95 |     Get the output filepath from APIMESH_OUTPUT_FILEPATH environment variable.
 96 |     If not set, defaults to {repo_path}/apimesh/swagger.json
 97 |     
 98 |     Returns:
 99 |         Output filepath as a string.
100 |     """
101 |     output_filepath = os.environ.get("APIMESH_OUTPUT_FILEPATH")
102 |     if output_filepath:
103 |         return os.path.abspath(output_filepath)
104 |     # Default to repo_path/apimesh/swagger.json
105 |     repo_path = get_repo_path()
106 |     default_path = os.path.join(repo_path, "apimesh", "swagger.json")
107 |     return os.path.abspath(default_path)
108 | 
109 | def get_github_repo_url() -> str:
110 |     """
111 |     Get the GitHub repository URL from git remote.
112 |     Uses APIMESH_USER_REPO_PATH environment variable to determine the repository path.
113 |     
114 |     Returns:
115 |         GitHub repository URL (e.g., "https://github.com/owner/repo") or empty string if not available.
116 |     """
117 |     try:
118 |         repo_path = get_repo_path()
119 |         original_dir = os.getcwd()
120 |         try:
121 |             os.chdir(repo_path)
122 |             result = subprocess.run(
123 |                 ['git', 'remote', 'get-url', 'origin'],
124 |                 capture_output=True,
125 |                 text=True,
126 |                 timeout=5,
127 |                 check=False
128 |             )
129 |             os.chdir(original_dir)
130 |         except Exception:
131 |             os.chdir(original_dir)
132 |             return ""
133 |         
134 |         if result.returncode == 0 and result.stdout:
135 |             remote_url = result.stdout.strip()
136 |             # Convert SSH format (git@github.com:owner/repo.git) to HTTPS format
137 |             # or extract from HTTPS format (https://github.com/owner/repo.git)
138 |             ssh_pattern = r'git@github\.com:(.+?)(?:\.git)?$'
139 |             https_pattern = r'https?://(?:www\.)?github\.com/(.+?)(?:\.git)?$'
140 |             
141 |             ssh_match = re.match(ssh_pattern, remote_url)
142 |             if ssh_match:
143 |                 owner_repo = ssh_match.group(1)
144 |                 return f"https://github.com/{owner_repo}"
145 |             
146 |             https_match = re.match(https_pattern, remote_url)
147 |             if https_match:
148 |                 owner_repo = https_match.group(1)
149 |                 return f"https://github.com/{owner_repo}"
150 |             
151 |             # Return as-is if it doesn't match GitHub patterns
152 |             return remote_url
153 |         
154 |         return ""
155 |     except Exception:
156 |         return ""
157 | 
158 | def get_git_commit_hash() -> str:
159 |     """
160 |     Get the current git commit hash for the repository.
161 |     Uses APIMESH_USER_REPO_PATH environment variable to determine the repository path.
162 |     
163 |     Returns:
164 |         Git commit hash as a string, or empty string if not available.
165 |     """
166 |     try:
167 |         repo_path = get_repo_path()
168 |         # Change to repo directory for git command
169 |         original_dir = os.getcwd()
170 |         try:
171 |             os.chdir(repo_path)
172 |             result = subprocess.run(
173 |                 ['git', 'rev-parse', 'HEAD'],
174 |                 capture_output=True,
175 |                 text=True,
176 |                 timeout=5,
177 |                 check=False
178 |             )
179 |             os.chdir(original_dir)
180 |         except Exception:
181 |             os.chdir(original_dir)
182 |             return ""
183 |         
184 |         if result.returncode == 0 and result.stdout:
185 |             return result.stdout.strip()
186 |         return ""
187 |     except Exception:
188 |         return ""


--------------------------------------------------------------------------------
/rails_pipeline/generate_file_information.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, List, Optional
  3 | 
  4 | from tree_sitter import Language, Parser
  5 | import tree_sitter_ruby
  6 | 
  7 | from config import Configurations
  8 | 
  9 | config = Configurations()
 10 | 
 11 | RUBY_LANGUAGE = Language(tree_sitter_ruby.language())
 12 | parser = Parser(RUBY_LANGUAGE)
 13 | 
 14 | 
 15 | def parse_file(filename: str):
 16 |     with open(filename, "r", encoding="utf-8") as f:
 17 |         code = f.read()
 18 |     tree = parser.parse(code.encode("utf-8"))
 19 |     return tree, code
 20 | 
 21 | 
 22 | def _node_text(source: str, node) -> str:
 23 |     return source[node.start_byte : node.end_byte]
 24 | 
 25 | 
 26 | def _gather_class_info(node, source: str) -> Dict:
 27 |     name_node = node.child_by_field_name("name")
 28 |     name = _node_text(source, name_node) if name_node else "<anonymous>"
 29 |     superclass_node = node.child_by_field_name("superclass")
 30 |     superclass = None
 31 |     if superclass_node:
 32 |         superclass = _node_text(source, superclass_node).strip()
 33 |         if superclass:
 34 |             superclass = superclass.lstrip("<").strip()
 35 |     return {
 36 |         "type": "class",
 37 |         "name": name,
 38 |         "start_line": node.start_point[0] + 1,
 39 |         "end_line": node.end_point[0] + 1,
 40 |         "superclass": superclass,
 41 |     }
 42 | 
 43 | 
 44 | def _gather_module_info(node, source: str) -> Dict:
 45 |     name_node = node.child_by_field_name("name")
 46 |     name = _node_text(source, name_node) if name_node else "<anonymous>"
 47 |     return {
 48 |         "type": "module",
 49 |         "name": name,
 50 |         "start_line": node.start_point[0] + 1,
 51 |         "end_line": node.end_point[0] + 1,
 52 |     }
 53 | 
 54 | 
 55 | def _gather_method_info(node, source: str) -> Dict:
 56 |     name_node = node.child_by_field_name("name")
 57 |     name = _node_text(source, name_node) if name_node else "<anonymous>"
 58 |     return {
 59 |         "type": "function",
 60 |         "name": name,
 61 |         "start_line": node.start_point[0] + 1,
 62 |         "end_line": node.end_point[0] + 1,
 63 |     }
 64 | 
 65 | 
 66 | def _gather_call_info(node, source: str) -> Dict:
 67 |     name_node = node.child_by_field_name("method")
 68 |     if not name_node:
 69 |         name_node = node.child_by_field_name("name")
 70 |     name = _node_text(source, name_node) if name_node else "<anonymous>"
 71 |     call_info = {
 72 |         "type": "function_call",
 73 |         "name": name,
 74 |         "start_line": node.start_point[0] + 1,
 75 |         "end_line": node.end_point[0] + 1,
 76 |     }
 77 | 
 78 |     definition_range = _infer_definition_range(node, source)
 79 |     if definition_range:
 80 |         call_info.update(definition_range)
 81 |     return call_info
 82 | 
 83 | 
 84 | def _infer_definition_range(node, source: str) -> Optional[Dict]:
 85 |     """
 86 |     Attempt to infer the definition range for an inline function call by
 87 |     locating the matching method definition within the same source buffer.
 88 |     """
 89 |     name_node = node.child_by_field_name("method")
 90 |     if not name_node:
 91 |         name_node = node.child_by_field_name("name")
 92 |     if not name_node:
 93 |         return None
 94 | 
 95 |     name = _node_text(source, name_node)
 96 |     # This heuristic scans siblings in the same scope looking for `def name`.
 97 |     parent = node.parent
 98 |     while parent is not None:
 99 |         for sibling in parent.children:
100 |             if sibling.type in {"method", "singleton_method"}:
101 |                 method_name_node = sibling.child_by_field_name("name")
102 |                 if method_name_node and _node_text(source, method_name_node) == name:
103 |                     return {
104 |                         "function_start_line": sibling.start_point[0] + 1,
105 |                         "function_end_line": sibling.end_point[0] + 1,
106 |                     }
107 |         parent = parent.parent
108 |     return None
109 | 
110 | 
111 | def _gather_import_info(node, source: str, base_directory: str) -> Optional[Dict]:
112 |     method_node = node.child_by_field_name("method")
113 |     if not method_node:
114 |         return None
115 | 
116 |     method_name = _node_text(source, method_node)
117 |     if method_name not in {"require", "require_relative"}:
118 |         return None
119 | 
120 |     arguments_node = node.child_by_field_name("arguments")
121 |     if arguments_node is None or len(arguments_node.children) == 0:
122 |         return None
123 | 
124 |     literal = None
125 |     for child in arguments_node.children:
126 |         if child.type == "string":
127 |             content = child.child_by_field_name("content")
128 |             if content:
129 |                 literal = _node_text(source, content)
130 |                 break
131 |         if child.type == "symbol_literal":
132 |             sym = child.child_by_field_name("name")
133 |             if sym:
134 |                 literal = _node_text(source, sym)
135 |                 break
136 | 
137 |     if literal is None:
138 |         return None
139 | 
140 |     origin = _resolve_required_path(
141 |         literal, base_directory, method_name == "require_relative"
142 |     )
143 | 
144 |     return {
145 |         "type": "import",
146 |         "imported_name": literal,
147 |         "from_module": literal,
148 |         "origin": origin,
149 |         "line": node.start_point[0] + 1,
150 |         "path_exists": origin is not None and os.path.exists(origin),
151 |         "usage_lines": [],
152 |     }
153 | 
154 | 
155 | def _resolve_required_path(
156 |     literal: str, base_directory: str, is_relative: bool
157 | ) -> Optional[str]:
158 |     if is_relative:
159 |         candidate = os.path.normpath(os.path.join(base_directory, f"{literal}.rb"))
160 |         if os.path.exists(candidate):
161 |             return candidate
162 |     else:
163 |         candidate = os.path.join(base_directory, f"{literal}.rb")
164 |         if os.path.exists(candidate):
165 |             return os.path.normpath(candidate)
166 |     return None
167 | 
168 | 
169 | def get_elements(tree, source: str, base_directory: str) -> Dict:
170 |     elements = {
171 |         "classes": [],
172 |         "modules": [],
173 |         "functions": [],
174 |         "function_calls": [],
175 |     }
176 |     imports: List[Dict] = []
177 | 
178 |     cursor = [tree.root_node]
179 |     while cursor:
180 |         node = cursor.pop()
181 |         node_type = node.type
182 |         if node_type == "class":
183 |             elements["classes"].append(_gather_class_info(node, source))
184 |         elif node_type == "module":
185 |             elements["modules"].append(_gather_module_info(node, source))
186 |         elif node_type in {"method", "singleton_method"}:
187 |             elements["functions"].append(_gather_method_info(node, source))
188 |         elif node_type in {"call", "command", "command_call"}:
189 |             elements["function_calls"].append(_gather_call_info(node, source))
190 | 
191 |             import_info = _gather_import_info(node, source, base_directory)
192 |             if import_info:
193 |                 imports.append(import_info)
194 | 
195 |         cursor.extend(list(node.children))
196 | 
197 |     return elements, imports
198 | 
199 | 
200 | def process_file(filename: str, base_directory: Optional[str] = None) -> Dict:
201 |     if not base_directory:
202 |         base_directory = os.path.dirname(filename)
203 | 
204 |     tree, code = parse_file(filename)
205 |     elements, imports = get_elements(tree, code, base_directory)
206 |     return {"filename": filename, "elements": elements, "imports": imports}
207 | 


--------------------------------------------------------------------------------
/swagger_generation_cli.py:
--------------------------------------------------------------------------------
  1 | import traceback
  2 | import os
  3 | 
  4 | from user_config import UserConfigurations
  5 | from swagger_generator import SwaggerGeneration
  6 | from file_scanner import FileScanner
  7 | from framework_identifier import FrameworkIdentifier
  8 | from endpoints_extractor import EndpointsExtractor
  9 | from faiss_index_generator import GenerateFaissIndex
 10 | from nodejs_pipeline.run_swagger_generation import run_swagger_generation as nodejs_swagger_generator
 11 | from python_pipeline.run_swagger_generation import run_swagger_generation as python_swagger_generator
 12 | from rails_pipeline.run_swagger_generation import run_swagger_generation as ruby_on_rails_swagger_generator
 13 | from golang_pipeline.run_swagger_generation import run_swagger_generation as golang_swagger_generator
 14 | from utils import get_output_filepath
 15 | import requests, json
 16 | import sys
 17 | 
 18 | class RunSwagger:
 19 |     def __init__(self, project_api_key, openai_api_key, ai_chat_id, is_mcp):
 20 |         self.ai_chat_id = ai_chat_id
 21 |         self.user_configurations = UserConfigurations(project_api_key, openai_api_key, ai_chat_id, is_mcp)
 22 |         self.user_config = self.user_configurations.load_user_config()
 23 |         self.framework_identifier = FrameworkIdentifier()
 24 |         self.file_scanner = FileScanner()
 25 |         self.endpoints_extractor = EndpointsExtractor()
 26 |         self.faiss_index = GenerateFaissIndex()
 27 |         self.swagger_generator = SwaggerGeneration()
 28 | 
 29 | 
 30 |     def run_python_nodejs_ruby(self, framework):
 31 |         swagger = None
 32 |         try:
 33 |             if framework == "django" or framework == "flask" or framework == "fastapi":
 34 |                 swagger = python_swagger_generator(self.user_config['api_host'])
 35 |             elif framework == "express" or framework == "nestjs":
 36 |                 swagger = nodejs_swagger_generator(self.user_config['api_host'])
 37 |             elif framework == "ruby_on_rails":
 38 |                 swagger = ruby_on_rails_swagger_generator(self.user_config['api_host'])
 39 |             elif framework == "golang":
 40 |                 swagger = golang_swagger_generator(self.user_config['api_host'])
 41 |         except Exception as ex:
 42 |             traceback.print_exc()
 43 |             print("Fallback to old procedure")
 44 |         return swagger
 45 | 
 46 |     def _resolve_ai_chat_id(self, ai_chat_id):
 47 |         candidate = (ai_chat_id or "").strip()
 48 |         if candidate and candidate.lower() != "null":
 49 |             return candidate
 50 |         return self.user_config.get("ai_chat_id", "null")
 51 | 
 52 |     def run(self, ai_chat_id=None):
 53 |         resolved_ai_chat_id = self._resolve_ai_chat_id(ai_chat_id if ai_chat_id is not None else self.ai_chat_id)
 54 |         try:
 55 |             file_paths = self.file_scanner.get_all_file_paths()
 56 |             print("\n***************************************************")
 57 |             if self.user_config.get('framework', None):
 58 |                 print(f"Using Existing Framework - {self.user_config['framework']}")
 59 |                 framework =  self.user_config.get('framework', "")
 60 |             else:
 61 |                 print("Started framework identification")
 62 |                 framework = self.framework_identifier.get_framework(file_paths)['framework']
 63 |                 self.user_config['framework'] = framework
 64 |                 self.user_configurations.save_user_config(self.user_config)
 65 |         except Exception as ex:
 66 |             msg = str(ex)
 67 |             lowered = msg.lower()
 68 |             if "insufficient_quota" in lowered or "quota" in lowered:
 69 |                 print("OpenAI quota exceeded. Please check your plan/billing and retry after adding credits.")
 70 |             else:
 71 |                 print("We do not support this framework currently. Please contact QodexAI support.")
 72 |             exit()
 73 |         print(f"completed framework identification - {framework}")
 74 |         print("\n***************************************************")
 75 |         print("Started finding files related to API information")
 76 |         try:
 77 |             swagger = self.run_python_nodejs_ruby(framework)
 78 |             if swagger:
 79 |                 output_filepath = get_output_filepath()
 80 |                 self.swagger_generator.save_swagger_json(swagger, output_filepath)
 81 |                 #self.upload_swagger_to_qodex(resolved_ai_chat_id)
 82 |                 exit()
 83 |             api_files = self.file_scanner.find_api_files(file_paths, framework)
 84 |             print("Completed finding files related to API information")
 85 |             all_endpoints = []
 86 |             for filePath in api_files:
 87 |                 endpoints = self.endpoints_extractor.extract_endpoints_with_gpt(filePath, framework)
 88 |                 all_endpoints.extend(endpoints)
 89 |             print("\n***************************************************")
 90 |             print("Started creating faiss index for all files")
 91 |             faiss_vector = self.faiss_index.create_faiss_index(file_paths, framework)
 92 |             print("Completed creating faiss index for all files")
 93 |             print("Fetching authentication related information")
 94 |             authentication_information = self.faiss_index.get_authentication_related_information(faiss_vector)
 95 |             print("Completed Fetching authentication related information")
 96 |             endpoint_related_information = self.endpoints_extractor.get_endpoint_related_information(faiss_vector, all_endpoints)
 97 |             swagger = self.swagger_generator.create_swagger_json(endpoint_related_information, authentication_information, framework, self.user_config['api_host'])
 98 |         except Exception as ex:
 99 |             traceback.print_exc()
100 |             print("Oops! looks like we encountered an issue. Please try after some time.")
101 |             exit()
102 |         try:
103 |             output_filepath = get_output_filepath()
104 |             self.swagger_generator.save_swagger_json(swagger, output_filepath)
105 |         except Exception as ex:
106 |             print("Swagger was not able to be saved. Please check your project api key and try again.")
107 |         #self.upload_swagger_to_qodex(resolved_ai_chat_id)
108 |         return
109 | 
110 | 
111 |     def upload_swagger_to_qodex(self, ai_chat_id):
112 |         qodex_api_key = self.user_config['qodex_api_key']
113 |         if qodex_api_key:
114 |             print("Uploading swagger to Qodex.AI")
115 |             url = "https://api.app.qodex.ai/api/v1/collection_imports/create_with_json"
116 |             output_filepath = get_output_filepath()
117 |             with open(output_filepath, "r") as file:
118 |                 swagger_doc = json.load(file)
119 |             payload = {
120 |                 "api_key": qodex_api_key,
121 |                 "swagger_doc": swagger_doc,
122 |                 "ai_chat_id": ai_chat_id
123 |             }
124 |             response = requests.post(url, json=payload)
125 | 
126 |             # Check the response
127 |             if response.status_code == 200 or response.status_code == 201:
128 |                 print("Success:", response.json())  # Or response.text for plain text responses
129 |                 print("Swagger successfully uploaded to Qodex AI. Please refresh your tab.")
130 |                 print("We highly recommend you to review the apis before generating test scenarios.")
131 |                 if str(ai_chat_id) != 'null':
132 |                     print("Open the following link in your browser or refresh the existing open page to continue further")
133 |                     print(f"https://app.qodex.ai/ai-agent?chatId={ai_chat_id}")
134 |             else:
135 |                 print(f"Failed with status code {response.status_code}: {response.text}")
136 |         return
137 | 
138 | 
139 | openai_api_key = sys.argv[1] if len(sys.argv) > 1 else ""
140 | project_api_key = sys.argv[2] if len(sys.argv) > 2 else ""
141 | ai_chat_id = sys.argv[3] if len(sys.argv) > 3 else ""
142 | is_mcp = sys.argv[4] if len(sys.argv) > 4 else False
143 | 
144 | RunSwagger(project_api_key, openai_api_key, ai_chat_id, is_mcp).run(ai_chat_id)
145 | 


--------------------------------------------------------------------------------
/python_pipeline/run_swagger_generation.py:
--------------------------------------------------------------------------------
  1 | import os, json, ast
  2 | import shutil
  3 | import datetime
  4 | from pathlib import Path
  5 | from python_pipeline.generate_file_information import process_file
  6 | from python_pipeline.find_api_definition_files import find_api_definition_files
  7 | from python_pipeline.identify_api_functions import set_parents, find_api_endpoints
  8 | from config import Configurations
  9 | from python_pipeline.definition_swagger_generator import get_function_definition_swagger
 10 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name
 11 | 
 12 | config = Configurations()
 13 | 
 14 | 
 15 | def should_process_directory(dir_path: str) -> bool:
 16 |     """
 17 |     Check if a directory should be processed or ignored
 18 |     """
 19 |     path_parts = dir_path.split(os.sep)
 20 |     return not any(part in config.ignored_dirs for part in path_parts)
 21 | 
 22 | def run_swagger_generation(host):
 23 |     directory_path = get_repo_path()
 24 |     repo_name = get_repo_name()
 25 |     new_dir_name = "qodex_file_information"
 26 |     new_dir_path = os.path.join(directory_path, new_dir_name)
 27 |     os.makedirs(new_dir_path, exist_ok=True)
 28 |     for root, dirs, files in os.walk(directory_path):
 29 |         for file in files:
 30 |             file_path = os.path.join(root, file)
 31 |             if os.path.exists(file_path) and should_process_directory(str(file_path)) and file_path.endswith(".py"):
 32 |                 file_info = process_file(file_path, directory_path)
 33 |                 json_file_name = new_dir_path +"/"+ str(file_path).replace("/", "_q_").strip(".py") + ".json"
 34 |                 with open(json_file_name, "w") as f:
 35 |                     json.dump(file_info, f, indent=4)
 36 |     api_definition_files = find_api_definition_files(directory_path)
 37 |     all_endpoints_dict = dict()
 38 |     for file in api_definition_files:
 39 |         all_endpoints = []
 40 |         py_file = Path(file)
 41 |         source = py_file.read_text(encoding="utf-8")
 42 |         tree = ast.parse(source)
 43 |         set_parents(tree)
 44 |         eps = find_api_endpoints(py_file)
 45 |         if eps:
 46 |             all_endpoints.extend(eps)
 47 |             all_endpoints_dict[file] = all_endpoints
 48 |     swagger = {
 49 |             "openapi": "3.0.0",
 50 |             "info": {
 51 |                 "title": repo_name,
 52 |                 "version": "1.0.0",
 53 |                 "description": "This Swagger file was generated using OpenAI GPT.",
 54 |                 "generated_at": datetime.datetime.utcnow().isoformat() + "Z",
 55 |                 "commit_reference": get_git_commit_hash(),
 56 |                 "github_repo_url": get_github_repo_url()
 57 |             },
 58 |             "servers": [
 59 |                 {
 60 |                     "url": host
 61 |                 }
 62 |             ],
 63 |             "paths": {}
 64 |         }
 65 |     for key, value in all_endpoints_dict.items():
 66 |         for item in value:
 67 |             if item['type'] == 'class':
 68 |                 if item['methods']:
 69 |                     for item1 in item['methods']:
 70 |                         context_code_blocks, method_definition_code_block = provide_context_codeblock(directory_path, item1)
 71 |                         swagger_for_def = get_function_definition_swagger(method_definition_code_block, context_code_blocks, item1['route'])
 72 |                         key = list(swagger_for_def['paths'].keys())[0]
 73 |                         if key not in swagger["paths"]:
 74 |                             swagger["paths"][key] = {}
 75 |                         _method_list = list(swagger_for_def['paths'][key].keys())
 76 |                         if not _method_list:
 77 |                             continue
 78 |                         _method = _method_list[0]
 79 |                         swagger["paths"][key][_method] = swagger_for_def['paths'][key][_method]
 80 |             else:
 81 |                 context_code_blocks, method_definition_code_block = provide_context_codeblock(directory_path,item)
 82 |                 swagger_for_def = get_function_definition_swagger(method_definition_code_block, context_code_blocks, item['route'])
 83 |                 key = list(swagger_for_def['paths'].keys())[0]
 84 |                 if key not in swagger["paths"]:
 85 |                     swagger["paths"][key] = {}
 86 |                 _method_list = list(swagger_for_def['paths'][key].keys())
 87 |                 if not _method_list:
 88 |                     continue
 89 |                 _method = _method_list[0]
 90 |                 swagger["paths"][key][_method] = swagger_for_def['paths'][key][_method]
 91 |     shutil.rmtree(new_dir_path)
 92 |     return swagger
 93 | 
 94 | 
 95 | def get_dependencies(data, start_line, end_line, file_path):
 96 |     existing_function_names = [item['name'] for item in data['elements']['functions'] if item['name'] not in ['get', 'post', 'put', 'delete', 'patch']]
 97 |     in_file_dependency_functions = []
 98 |     for item in data['elements']['function_calls']:
 99 |         if (item['name'] in existing_function_names) and item['start_line'] >= start_line and item['end_line'] <= end_line:
100 |             item['file_path'] = file_path
101 |             in_file_dependency_functions.append(item)
102 |     imported_functions = []
103 |     for item in data['imports']:
104 |         if not item['path_exists']:
105 |             continue
106 |         for k in item['usage_lines']:
107 |             if start_line<=k<=end_line:
108 |                 imported_functions.append(item)
109 |             if in_file_dependency_functions:
110 |                 for item1 in in_file_dependency_functions:
111 |                     if item1['start_line'] <= k <= item1['end_line'] and item not in imported_functions:
112 |                         imported_functions.append(item)
113 |     return in_file_dependency_functions, imported_functions
114 | 
115 | def get_code_blocks(in_file_dependency_functions, imported_functions, file_name, directory_path):
116 |     code_blocks = []
117 |     for block in in_file_dependency_functions:
118 |         with open(file_name, "r") as f:
119 |             lines = f.readlines()
120 |             f.close()
121 |         code_blocks.append(lines[block['function_start_line'] - 1 : block['function_start_line']])
122 |     for func in imported_functions:
123 |         visited = False
124 |         file_name = func['origin']
125 |         json_dir_path = directory_path + "/" + "qodex_file_information"
126 |         json_file = str(file_name).replace("/", "_q_").strip(".py") + ".json"
127 |         complete_json_file_path = json_dir_path + "/" + json_file
128 |         with open(complete_json_file_path, "r") as f:
129 |             data = json.load(f)
130 |             f.close()
131 |         for item in data['elements']['classes']:
132 |             if item['name'] == func['imported_name']:
133 |                 visited = True
134 |                 with open(file_name, "r") as f:
135 |                     lines = f.readlines()
136 |                     f.close()
137 |                 code_blocks.append(lines[item['start_line']-1: item['end_line']])
138 |                 break
139 |         if not visited:
140 |             for item in data['elements']['functions']:
141 |                 if item['name'] == func['imported_name']:
142 |                     visited = True
143 |                     with open(file_name, "r") as f:
144 |                         lines = f.readlines()
145 |                         f.close()
146 |                     code_blocks.append(lines[item['start_line'] - 1: item['end_line']])
147 |                     break
148 |         if not visited:
149 |             for item in data['elements']['variables']:
150 |                 if item['name'] == func['imported_name']:
151 |                     with open(file_name, "r") as f:
152 |                         lines = f.readlines()
153 |                         f.close()
154 |                     code_blocks.append(lines[item['start_line'] - 1: item['end_line']])
155 |                     break
156 |     return code_blocks
157 | 
158 | 
159 | def provide_context_codeblock(directory_path, method_info):
160 |     file_name = method_info['file_path']
161 |     with open(method_info['file_path'], "r") as f:
162 |         lines = f.readlines()
163 |     method_definition_code_block = lines[method_info["start_line"]-1: method_info["end_line"]]
164 |     json_dir_path = directory_path + "/" + "qodex_file_information"
165 |     json_file = str(file_name).replace("/", "_q_").strip(".py") + ".json"
166 |     complete_json_file_path = json_dir_path + "/" + json_file
167 |     with open(complete_json_file_path, "r") as f:
168 |         data = json.load(f)
169 |     in_file_dependency_functions, imported_functions = get_dependencies(data, method_info["start_line"], method_info["end_line"], method_info['file_path'])
170 |     context_code_blocks = get_code_blocks(in_file_dependency_functions, imported_functions, file_name, directory_path)
171 |     return context_code_blocks, method_definition_code_block
172 | 
173 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/generate_file_information.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | from tree_sitter import Language, Parser, QueryCursor
  3 | import tree_sitter_javascript
  4 | import tree_sitter_typescript
  5 | import os
  6 | import json
  7 | from nodejs_pipeline.constants import (
  8 |     TYPESCRIPT_FILE_EXTENSIONS,
  9 |     TSX_FILE_EXTENSIONS,
 10 | )
 11 | 
 12 | # Load JavaScript grammar
 13 | JS_LANGUAGE = Language(tree_sitter_javascript.language())
 14 | TS_LANGUAGE = Language(tree_sitter_typescript.language_typescript())
 15 | TSX_LANGUAGE = Language(tree_sitter_typescript.language_tsx())
 16 | 
 17 | 
 18 | def _get_language_for_file(filename: str) -> Language:
 19 |     suffix = Path(filename).suffix.lower()
 20 |     if suffix in TSX_FILE_EXTENSIONS and TSX_LANGUAGE:
 21 |         return TSX_LANGUAGE
 22 |     if suffix in TYPESCRIPT_FILE_EXTENSIONS and TS_LANGUAGE:
 23 |         return TS_LANGUAGE
 24 |     return JS_LANGUAGE
 25 | 
 26 | def parse_file(filename):
 27 |     with open(filename, 'r', encoding='utf-8') as f:
 28 |         code = f.read()
 29 |     language = _get_language_for_file(filename)
 30 |     parser = Parser(language)
 31 |     tree = parser.parse(code.encode('utf-8'))
 32 |     return tree, code, language
 33 | 
 34 | 
 35 | def _parse_with_language(code: str, language: Language):
 36 |     parser = Parser(language)
 37 |     return parser.parse(code.encode('utf-8'))
 38 | 
 39 | def get_module_origin(module_name, base_directory):
 40 |     """
 41 |     Resolve JS import/require origin similar to Node.js module resolution.
 42 |     """
 43 |     # Relative import
 44 |     if module_name.startswith("."):
 45 |         path = os.path.normpath(os.path.join(base_directory, module_name))
 46 |         search_exts = [
 47 |             ".ts",
 48 |             ".tsx",
 49 |             ".cts",
 50 |             ".mts",
 51 |             ".js",
 52 |             ".mjs",
 53 |             ".cjs",
 54 |             ".d.ts",
 55 |             "/index.ts",
 56 |             "/index.tsx",
 57 |             "/index.cts",
 58 |             "/index.mts",
 59 |             "/index.js",
 60 |             "/index.mjs",
 61 |             "/index.cjs",
 62 |         ]
 63 |         for ext in search_exts:
 64 |             candidate = path + ext
 65 |             if os.path.exists(candidate):
 66 |                 return os.path.abspath(candidate)
 67 |         return None
 68 | 
 69 |     # Look in node_modules
 70 |     node_module_path = os.path.join(base_directory, "node_modules", module_name)
 71 |     if os.path.exists(node_module_path):
 72 |         return os.path.abspath(node_module_path)
 73 | 
 74 |     return "<node_builtin_or_external>"
 75 | 
 76 | def find_import_usages(tree, imported_names, language):
 77 |     """Find where imported identifiers are used."""
 78 |     query = language.query("""
 79 |         (identifier) @ident
 80 |     """)
 81 |     cursor = QueryCursor(query)
 82 |     captures = cursor.captures(tree.root_node)
 83 | 
 84 |     usages = {name: [] for name in imported_names}
 85 |     for node in captures.get("ident", []):
 86 |         name = node.text.decode("utf-8")
 87 |         if name in imported_names:
 88 |             line = node.start_point[0] + 1
 89 |             if line not in usages[name]:
 90 |                 usages[name].append(line)
 91 |     return usages
 92 | 
 93 | def get_elements(tree, code, base_directory, language):
 94 |     """
 95 |     Extract classes, functions, variables, function calls, imports.
 96 |     """
 97 |     query = language.query("""
 98 |         (class_declaration
 99 |             name: (identifier) @class-name) @class
100 | 
101 |         (function_declaration
102 |             name: (identifier) @func-name) @function
103 | 
104 |         (variable_declarator
105 |             name: (identifier) @var-name) @variable
106 | 
107 |         (call_expression
108 |             function: (identifier) @called-func) @func-call
109 | 
110 |         (call_expression
111 |             function: (member_expression
112 |                 property: (property_identifier) @method-name)) @method-call
113 | 
114 |         ; ES6 imports
115 |         (import_statement
116 |             (import_clause (identifier) @imported-symbol)?
117 |             source: (string) @import-source)
118 | 
119 |         ; CommonJS require
120 |         (variable_declarator
121 |             name: (identifier) @var-name
122 |             value: (call_expression
123 |                 function: (identifier) @require-func
124 |                 arguments: (arguments (string) @require-source)
125 |             )
126 |         )
127 |     """)
128 | 
129 |     cursor = QueryCursor(query)
130 |     captures = cursor.captures(tree.root_node)
131 | 
132 |     elements = {
133 |         'classes': [],
134 |         'functions': [],
135 |         'variables': [],
136 |         'function_calls': [],
137 |         'imports': []
138 |     }
139 | 
140 |     imported_names = set()
141 | 
142 |     # Collect symbols
143 |     for node in captures.get("func-name", []):
144 |         elements['functions'].append({
145 |             'type': 'function',
146 |             'name': node.text.decode(),
147 |             'line': node.start_point[0] + 1,
148 |             'start_line': node.start_point[0] + 1,
149 |             'end_line': node.end_point[0]+1
150 |         })
151 | 
152 |     for node in captures.get("class-name", []):
153 |         elements['classes'].append({
154 |             'type': 'class',
155 |             'name': node.text.decode(),
156 |             'line': node.start_point[0] + 1,
157 |             'start_line': node.start_point[0] + 1,
158 |             'end_line': node.end_point[0]+1
159 |         })
160 | 
161 |     for node in captures.get("var-name", []):
162 |         elements['variables'].append({
163 |             'type': 'variable',
164 |             'name': node.text.decode(),
165 |             'line': node.start_point[0] + 1,
166 |             'start_line': node.start_point[0] + 1,
167 |             'end_line': node.end_point[0]+1
168 |         })
169 | 
170 |     for node in captures.get("called-func", []):
171 |         elements['function_calls'].append({
172 |             'type': 'function_call',
173 |             'name': node.text.decode(),
174 |             'line': node.start_point[0] + 1,
175 |             'start_line': node.start_point[0] + 1,
176 |             'end_line': node.end_point[0]+1
177 |         })
178 | 
179 |     for node in captures.get("method-name", []):
180 |         elements['function_calls'].append({
181 |             'type': 'method_call',
182 |             'name': node.text.decode(),
183 |             'line': node.start_point[0] + 1,
184 |             'start_line': node.start_point[0] + 1,
185 |             'end_line': node.end_point[0]+1
186 |         })
187 | 
188 |     # Handle imports
189 |     sources = captures.get("import-source", []) + captures.get("require-source", [])
190 |     imported_symbols = captures.get("imported-symbol", []) + captures.get("var-name", [])  # align require names
191 | 
192 |     for i, source_node in enumerate(sources):
193 |         module_name = source_node.text.decode().strip('"\'')
194 |         origin = get_module_origin(module_name, base_directory)
195 |         imported_name = None
196 |         if i < len(imported_symbols):
197 |             imported_name = imported_symbols[i].text.decode()
198 |             imported_names.add(imported_name)
199 | 
200 |         elements['imports'].append({
201 |             'type': 'import',
202 |             'imported_name': imported_name if imported_name else "require",
203 |             'from_module': module_name,
204 |             'origin': origin,
205 |             'line': source_node.start_point[0] + 1,
206 |             'path_exists': os.path.exists(origin) if origin and origin != "<node_builtin_or_external>" else False,
207 |             'usage_lines': []
208 |         })
209 | 
210 |     # Find import usages
211 |     if imported_names:
212 |         usages = find_import_usages(tree, imported_names, language)
213 |         for imp in elements['imports']:
214 |             name = imp['imported_name']
215 |             if name and name in usages:
216 |                 imp['usage_lines'] = list(set(usages[name]) - {imp['line']})
217 | 
218 |     return elements
219 | 
220 | def process_file(filename, base_directory=None):
221 |     if not base_directory:
222 |         base_directory = os.path.dirname(filename)
223 | 
224 |     tree, code, language = parse_file(filename)
225 |     try:
226 |         elements = get_elements(tree, code, base_directory, language)
227 |     except Exception as ex:
228 |         suffix = Path(filename).suffix.lower()
229 |         if suffix in TYPESCRIPT_FILE_EXTENSIONS or suffix in TSX_FILE_EXTENSIONS:
230 |             # Fallback: try parsing with JS grammar to salvage metadata for TS/TSX files that break the TS query
231 |             fallback_tree = _parse_with_language(code, JS_LANGUAGE)
232 |             elements = get_elements(fallback_tree, code, base_directory, JS_LANGUAGE)
233 |         else:
234 |             raise
235 | 
236 |     return {
237 |         'filename': filename,
238 |         'elements': elements
239 |     }
240 | 
241 | if __name__ == "__main__":
242 |     filename = "/Users/ankits/My-Favourite-Playlist/server.js"
243 |     base_directory = "/Users/ankits/My-Favourite-Playlist"
244 |     if os.path.exists(filename):
245 |         result = process_file(filename, base_directory)
246 |         print(json.dumps(result, indent=2))
247 |     else:
248 |         print(f"File {filename} not found")
249 | 


--------------------------------------------------------------------------------
/python_pipeline/generate_file_information.py:
--------------------------------------------------------------------------------
  1 | from tree_sitter import Language, Parser, QueryCursor
  2 | import tree_sitter_python
  3 | import ast
  4 | import importlib.util
  5 | import os
  6 | import sys
  7 | from config import Configurations
  8 | 
  9 | config = Configurations()
 10 | 
 11 | PY_LANGUAGE = Language(tree_sitter_python.language())
 12 | 
 13 | parser = Parser(PY_LANGUAGE)
 14 | 
 15 | 
 16 | def parse_file(filename):
 17 |     with open(filename, 'r', encoding='utf-8') as f:
 18 |         code = f.read()
 19 |     tree = parser.parse(code.encode('utf-8'))
 20 |     return tree, code
 21 | 
 22 | 
 23 | def get_module_origin(module_name, base_directory=None):
 24 |     try:
 25 |         original_path = sys.path.copy()
 26 |         if base_directory and base_directory not in sys.path:
 27 |             sys.path.insert(0, base_directory)
 28 | 
 29 |         spec = importlib.util.find_spec(module_name)
 30 |         if spec and spec.origin:
 31 |             return spec.origin
 32 |         elif spec is None:
 33 |             return "<built-in>"
 34 |     except Exception:
 35 |         pass
 36 |     finally:
 37 |         sys.path = original_path
 38 | 
 39 |     if base_directory and "." in module_name:
 40 |         parts = module_name.split(".")
 41 |         potential_path = os.path.join(base_directory, *parts)
 42 |         for ext in (".py", "/__init__.py"):
 43 |             candidate = potential_path + ext
 44 |             if os.path.exists(candidate):
 45 |                 return candidate
 46 |     return None
 47 | 
 48 | 
 49 | def find_import_usages(tree, imported_names):
 50 |     """Find lines where imported names are used in the code."""
 51 |     query = PY_LANGUAGE.query("""
 52 |         (identifier) @ident
 53 |     """)
 54 | 
 55 |     cursor = QueryCursor(query)
 56 |     captures = cursor.captures(tree.root_node)
 57 | 
 58 |     usages = {name: [] for name in imported_names}
 59 | 
 60 |     for node in captures.get("ident", []):
 61 |         name = node.text.decode("utf-8")
 62 |         if name in imported_names:
 63 |             line = node.start_point[0] + 1
 64 |             if line not in usages[name]:  # Avoid duplicates
 65 |                 usages[name].append(line)
 66 | 
 67 |     return usages
 68 | 
 69 | 
 70 | def analyze_imports(filepath, base_directory=None, tree=None):
 71 |     imports = []
 72 |     imported_names = set()  # Track imported names for usage lookup
 73 |     try:
 74 |         with open(filepath, "r", encoding="utf-8") as f:
 75 |             source = f.read()
 76 |             tree_ast = ast.parse(source, filename=filepath)
 77 | 
 78 |         for node in ast.walk(tree_ast):
 79 |             if isinstance(node, ast.ImportFrom):
 80 |                 module = node.module
 81 |                 if module is None:
 82 |                     continue  # skip relative imports
 83 |                 origin = get_module_origin(module, base_directory)
 84 |                 for alias in node.names:
 85 |                     name = alias.asname if alias.asname else alias.name
 86 |                     imported_names.add(name)
 87 |                     imports.append({
 88 |                         'type': 'import',
 89 |                         'imported_name': alias.name,
 90 |                         'from_module': module,
 91 |                         'origin': origin,
 92 |                         'line': node.lineno,
 93 |                         'path_exists': False,  # Will be updated later
 94 |                         'usage_lines': []  # Will be populated later
 95 |                     })
 96 |             elif isinstance(node, ast.Import):
 97 |                 for alias in node.names:
 98 |                     name = alias.asname if alias.asname else alias.name
 99 |                     imported_names.add(name)
100 |                     origin = get_module_origin(alias.name, base_directory)
101 |                     imports.append({
102 |                         'type': 'import',
103 |                         'imported_name': alias.name,
104 |                         'from_module': None,
105 |                         'origin': origin,
106 |                         'line': node.lineno,
107 |                         'path_exists': False,  # Will be updated later
108 |                         'usage_lines': []  # Will be populated later
109 |                     })
110 |     except Exception as e:
111 |         print(f"Error analyzing imports in {filepath}: {str(e)}")
112 | 
113 |     # Find where imported names are used
114 |     if tree and imported_names:
115 |         usages = find_import_usages(tree, imported_names)
116 |         for import_item in imports:
117 |             name = import_item['imported_name']
118 |             if import_item.get('asname'):
119 |                 name = import_item['asname']
120 |             import_item['usage_lines'] = list(set(usages.get(name, [])) - set([import_item['line']]))
121 | 
122 |     return imports
123 | 
124 | 
125 | def get_elements(tree):
126 |     query = PY_LANGUAGE.query("""
127 |         (class_definition
128 |             name: (identifier) @class-name) @class
129 |         (function_definition
130 |             name: (identifier) @func-name) @function
131 |         (assignment
132 |             left: (identifier) @var-name) @variable
133 |         (call
134 |             function: (identifier) @called-func) @func-call
135 |         (call
136 |             function: (attribute
137 |                 attribute: (identifier) @method-name)) @method-call
138 |         (import_statement
139 |             name: (dotted_name (identifier) @imported-func))
140 |         (import_from_statement
141 |             name: (dotted_name (identifier) @imported-func))
142 |     """)
143 | 
144 |     cursor = QueryCursor(query)
145 |     captures = cursor.captures(tree.root_node)
146 | 
147 |     elements = {
148 |         'classes': [],
149 |         'functions': [],
150 |         'variables': [],
151 |         'function_calls': [],
152 |     }
153 | 
154 |     # Collect function definitions for cross-referencing
155 |     function_defs = {}
156 |     for node in captures.get("func-name", []):
157 |         func_name = node.text.decode("utf-8")
158 |         elements['functions'].append({
159 |             'type': 'function',
160 |             'name': func_name,
161 |             'start_line': node.start_point[0] + 1,
162 |             'end_line': node.parent.end_point[0] + 1
163 |         })
164 |         function_defs[func_name] = {
165 |             'start_line': node.start_point[0] + 1,
166 |             'end_line': node.parent.end_point[0] + 1
167 |         }
168 | 
169 |     for node in captures.get("class-name", []):
170 |         elements['classes'].append({
171 |             'type': 'class',
172 |             'name': node.text.decode("utf-8"),
173 |             'start_line': node.start_point[0] + 1,
174 |             'end_line': node.parent.end_point[0] + 1
175 |         })
176 | 
177 |     for node in captures.get("var-name", []):
178 |         elements['variables'].append({
179 |             'type': 'variable',
180 |             'name': node.text.decode("utf-8"),
181 |             'start_line': node.start_point[0] + 1,
182 |             'end_line': node.parent.end_point[0] + 1
183 |         })
184 | 
185 |     for node in captures.get("called-func", []):
186 |         func_name = node.text.decode("utf-8")
187 |         call_info = {
188 |             'type': 'function_call',
189 |             'name': func_name,
190 |             'start_line': node.start_point[0] + 1,
191 |             'end_line': node.parent.end_point[0] + 1
192 |         }
193 |         if func_name in function_defs:
194 |             call_info['function_start_line'] = function_defs[func_name]['start_line']
195 |             call_info['function_end_line'] = function_defs[func_name]['end_line']
196 |         elements['function_calls'].append(call_info)
197 | 
198 |     for node in captures.get("method-name", []):
199 |         method_name = node.text.decode("utf-8")
200 |         call_info = {
201 |             'type': 'function_call',
202 |             'name': method_name,
203 |             'start_line': node.start_point[0] + 1,
204 |             'end_line': node.parent.end_point[0] + 1
205 |         }
206 |         if method_name in function_defs:
207 |             call_info['function_start_line'] = function_defs[method_name]['start_line']
208 |             call_info['function_end_line'] = function_defs[method_name]['end_line']
209 |         elements['function_calls'].append(call_info)
210 |     return elements
211 | 
212 | 
213 | def check_path_exists(imports, base_directory):
214 |     for import_item in imports:
215 |         origin = import_item.get('origin')
216 |         if origin and origin != "<built-in>" and os.path.isabs(origin):
217 |             try:
218 |                 origin = os.path.normpath(origin)
219 |                 base_directory = os.path.normpath(base_directory)
220 |                 if os.path.exists(origin):
221 |                     common_prefix = os.path.commonpath([origin, base_directory])
222 |                     import_item['path_exists'] = common_prefix == base_directory or origin.startswith(base_directory)
223 |                 else:
224 |                     import_item['path_exists'] = False
225 |             except Exception:
226 |                 import_item['path_exists'] = False
227 |         else:
228 |             import_item['path_exists'] = False
229 |     return imports
230 | 
231 | 
232 | def process_file(filename, base_directory=None):
233 |     if not base_directory:
234 |         base_directory = os.path.dirname(filename)
235 |     tree, code = parse_file(filename)
236 |     elements = get_elements(tree)
237 |     imports = analyze_imports(filename, base_directory, tree)
238 |     imports = check_path_exists(imports, base_directory)
239 |     return {
240 |         'filename': filename,
241 |         'elements': elements,
242 |         'imports': imports
243 |     }
244 | 
245 | 
246 | def should_process_directory(dir_path: str) -> bool:
247 |     """
248 |     Check if a directory should be processed or ignored
249 |     """
250 |     path_parts = dir_path.split(os.sep)
251 |     return not any(part in config.ignored_dirs for part in path_parts)
252 | 
253 | 
254 | if __name__ == "__main__":
255 |     import json
256 |     filename = "/Users/ankits/PycharmProjects/qpulse-backend/python_scripts/interactive_ai_agent/tools/get_test_scenario_tags.py"
257 |     base_directory = "/Users/ankits/PycharmProjects/qpulse-backend"
258 |     if os.path.exists(filename) and should_process_directory(filename) and filename.endswith(".py"):
259 |         result = process_file(filename, base_directory)
260 |         print(json.dumps(result, indent=2))
261 |     else:
262 |         print(f"File {filename} not found")
263 | 


--------------------------------------------------------------------------------
/golang_pipeline/generate_file_information.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict, List, Optional
  3 | 
  4 | from tree_sitter import Language, Parser, QueryCursor
  5 | import tree_sitter_go
  6 | 
  7 | from config import Configurations
  8 | 
  9 | config = Configurations()
 10 | 
 11 | GO_LANGUAGE = Language(tree_sitter_go.language())
 12 | parser = Parser(GO_LANGUAGE)
 13 | _MODULE_NAME_CACHE: Dict[str, Optional[str]] = {}
 14 | 
 15 | 
 16 | def parse_file(filename: str):
 17 |     with open(filename, "r", encoding="utf-8") as f:
 18 |         code = f.read()
 19 |     tree = parser.parse(code.encode("utf-8"))
 20 |     return tree, code
 21 | 
 22 | 
 23 | def _node_text(source: str, node) -> str:
 24 |     return source[node.start_byte : node.end_byte]
 25 | 
 26 | 
 27 | def _strip_quotes(value: Optional[str]) -> str:
 28 |     if not value:
 29 |         return ""
 30 |     value = value.strip()
 31 |     if value.startswith(("`", '"')) and value.endswith(("`", '"')):
 32 |         return value[1:-1]
 33 |     return value
 34 | 
 35 | 
 36 | def _get_module_name(base_directory: str) -> Optional[str]:
 37 |     cached = _MODULE_NAME_CACHE.get(base_directory)
 38 |     if cached is not None:
 39 |         return cached
 40 |     module_name = None
 41 |     go_mod_path = os.path.join(base_directory, "go.mod")
 42 |     try:
 43 |         with open(go_mod_path, "r", encoding="utf-8") as f:
 44 |             for line in f:
 45 |                 line = line.strip()
 46 |                 if line.startswith("module "):
 47 |                     parts = line.split()
 48 |                     if len(parts) >= 2:
 49 |                         module_name = parts[1]
 50 |                     break
 51 |     except OSError:
 52 |         module_name = None
 53 |     _MODULE_NAME_CACHE[base_directory] = module_name
 54 |     return module_name
 55 | 
 56 | 
 57 | def _resolve_import_origin(import_path: str, base_directory: Optional[str]) -> Optional[str]:
 58 |     if not base_directory:
 59 |         return None
 60 |     normalized = _strip_quotes(import_path)
 61 |     if not normalized:
 62 |         return None
 63 |     segments = [segment for segment in normalized.split("/") if segment]
 64 |     candidate = os.path.join(base_directory, *segments)
 65 |     if os.path.isdir(candidate):
 66 |         return os.path.normpath(candidate)
 67 |     go_file = f"{candidate}.go"
 68 |     if os.path.exists(go_file):
 69 |         return os.path.normpath(go_file)
 70 |     module_name = _get_module_name(base_directory)
 71 |     if module_name and normalized.startswith(module_name):
 72 |         rel_path = normalized[len(module_name) :].lstrip("/")
 73 |         if rel_path:
 74 |             module_candidate = os.path.join(base_directory, rel_path)
 75 |             if os.path.isdir(module_candidate):
 76 |                 return os.path.normpath(module_candidate)
 77 |             module_go = f"{module_candidate}.go"
 78 |             if os.path.exists(module_go):
 79 |                 return os.path.normpath(module_go)
 80 |     return None
 81 | 
 82 | 
 83 | def _collect_functions(root, source: str, file_path: str) -> List[Dict]:
 84 |     functions: List[Dict] = []
 85 |     stack = [root]
 86 |     while stack:
 87 |         node = stack.pop()
 88 |         if node.type in {"function_declaration", "method_declaration"}:
 89 |             name_node = node.child_by_field_name("name")
 90 |             if not name_node:
 91 |                 stack.extend(list(node.children))
 92 |                 continue
 93 |             func_name = _node_text(source, name_node)
 94 |             receiver_node = node.child_by_field_name("receiver")
 95 |             receiver = _node_text(source, receiver_node).strip() if receiver_node else None
 96 |             functions.append(
 97 |                 {
 98 |                     "type": "function",
 99 |                     "name": func_name,
100 |                     "receiver": receiver,
101 |                     "start_line": node.start_point[0] + 1,
102 |                     "end_line": node.end_point[0] + 1,
103 |                     "file_path": file_path,
104 |                 }
105 |             )
106 |         stack.extend(list(node.children))
107 |     return functions
108 | 
109 | 
110 | def _collect_types(root, source: str, file_path: str) -> List[Dict]:
111 |     types: List[Dict] = []
112 |     stack = [root]
113 |     while stack:
114 |         node = stack.pop()
115 |         if node.type == "type_spec":
116 |             name_node = node.child_by_field_name("name")
117 |             type_node = node.child_by_field_name("type")
118 |             if not name_node or not type_node:
119 |                 stack.extend(list(node.children))
120 |                 continue
121 |             type_name = _node_text(source, name_node)
122 |             types.append(
123 |                 {
124 |                     "type": "type",
125 |                     "name": type_name,
126 |                     "start_line": node.start_point[0] + 1,
127 |                     "end_line": node.end_point[0] + 1,
128 |                     "type_kind": type_node.type,
129 |                     "file_path": file_path,
130 |                 }
131 |             )
132 |         stack.extend(list(node.children))
133 |     return types
134 | 
135 | 
136 | def _extract_call_name(function_node, source: str) -> Optional[str]:
137 |     if function_node is None:
138 |         return None
139 |     if function_node.type == "identifier":
140 |         return _node_text(source, function_node)
141 |     if function_node.type == "selector_expression":
142 |         field_node = function_node.child_by_field_name("field")
143 |         if field_node:
144 |             return _node_text(source, field_node)
145 |     return None
146 | 
147 | 
148 | def _collect_function_calls(root, source: str) -> List[Dict]:
149 |     calls: List[Dict] = []
150 |     stack = [root]
151 |     while stack:
152 |         node = stack.pop()
153 |         if node.type == "call_expression":
154 |             function_node = node.child_by_field_name("function")
155 |             call_name = _extract_call_name(function_node, source)
156 |             if call_name:
157 |                 calls.append(
158 |                     {
159 |                         "type": "function_call",
160 |                         "name": call_name,
161 |                         "full_name": _node_text(source, function_node),
162 |                         "start_line": node.start_point[0] + 1,
163 |                         "end_line": node.end_point[0] + 1,
164 |                     }
165 |                 )
166 |         stack.extend(list(node.children))
167 |     return calls
168 | 
169 | 
170 | def _collect_imports(root, source: str, base_directory: Optional[str]) -> List[Dict]:
171 |     imports: List[Dict] = []
172 |     stack = [root]
173 |     while stack:
174 |         node = stack.pop()
175 |         if node.type == "import_declaration":
176 |             for child in node.named_children:
177 |                 if child.type != "import_spec":
178 |                     continue
179 |                 path_node = child.child_by_field_name("path")
180 |                 if not path_node:
181 |                     continue
182 |                 raw_path = _node_text(source, path_node)
183 |                 path_value = _strip_quotes(raw_path)
184 |                 alias_node = child.child_by_field_name("name")
185 |                 alias = _node_text(source, alias_node) if alias_node else None
186 |                 imported_name = alias or (path_value.split("/")[-1] if path_value else None)
187 |                 origin = _resolve_import_origin(path_value, base_directory)
188 |                 imports.append(
189 |                     {
190 |                         "type": "import",
191 |                         "imported_name": imported_name,
192 |                         "alias": alias,
193 |                         "from_module": path_value,
194 |                         "origin": origin,
195 |                         "line": child.start_point[0] + 1,
196 |                         "path_exists": bool(origin and os.path.exists(origin)),
197 |                         "usage_lines": [],
198 |                     }
199 |                 )
200 |         stack.extend(list(node.children))
201 |     return imports
202 | 
203 | 
204 | def _annotate_import_usages(tree, source: str, imports: List[Dict]) -> None:
205 |     alias_map = {}
206 |     for item in imports:
207 |         alias_key = item.get("alias") or item.get("imported_name")
208 |         if alias_key and alias_key not in {"_", "."}:
209 |             alias_map[alias_key] = item
210 |     if not alias_map:
211 |         return
212 |     query = GO_LANGUAGE.query("(identifier) @ident")
213 |     cursor = QueryCursor(query)
214 |     captures = cursor.captures(tree.root_node)
215 |     for node in captures.get("ident", []):
216 |         ident = node.text.decode("utf-8")
217 |         import_entry = alias_map.get(ident)
218 |         if not import_entry:
219 |             continue
220 |         line = node.start_point[0] + 1
221 |         if line == import_entry["line"]:
222 |             continue
223 |         usage_lines = import_entry.setdefault("usage_lines", [])
224 |         if line not in usage_lines:
225 |             usage_lines.append(line)
226 | 
227 | 
228 | def _attach_call_ranges(functions: List[Dict], calls: List[Dict]) -> None:
229 |     functions_by_name: Dict[str, Dict] = {}
230 |     for func in functions:
231 |         functions_by_name.setdefault(func["name"], func)
232 |     for call in calls:
233 |         target = functions_by_name.get(call["name"])
234 |         if not target:
235 |             continue
236 |         call["function_start_line"] = target["start_line"]
237 |         call["function_end_line"] = target["end_line"]
238 | 
239 | 
240 | def get_elements(tree, source: str, base_directory: str) -> Dict:
241 |     elements: Dict = {
242 |         "functions": [],
243 |         "function_calls": [],
244 |         "types": [],
245 |     }
246 |     functions = _collect_functions(tree.root_node, source, "")
247 |     calls = _collect_function_calls(tree.root_node, source)
248 |     _attach_call_ranges(functions, calls)
249 |     elements["functions"] = functions
250 |     elements["function_calls"] = calls
251 |     elements["types"] = _collect_types(tree.root_node, source, "")
252 |     imports = _collect_imports(tree.root_node, source, base_directory)
253 |     _annotate_import_usages(tree, source, imports)
254 |     return elements, imports
255 | 
256 | 
257 | def process_file(filename: str, base_directory: Optional[str] = None) -> Dict:
258 |     if not base_directory:
259 |         base_directory = os.path.dirname(filename)
260 |     tree, source = parse_file(filename)
261 |     elements, imports = get_elements(tree, source, base_directory)
262 |     # Ensure file_path for functions is populated after parsing.
263 |     for func in elements.get("functions", []):
264 |         func["file_path"] = filename
265 |     for type_entry in elements.get("types", []):
266 |         type_entry["file_path"] = filename
267 |     return {"filename": filename, "elements": elements, "imports": imports}
268 | 


--------------------------------------------------------------------------------
/swagger_generator.py:
--------------------------------------------------------------------------------
  1 | from llm_client import OpenAiClient
  2 | from config import Configurations
  3 | import prompts
  4 | from concurrent.futures import ThreadPoolExecutor, as_completed
  5 | import json
  6 | import time
  7 | import os, re
  8 | import datetime
  9 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name, format_repo_name
 10 | 
 11 | config = Configurations()
 12 | 
 13 | class SwaggerGeneration:
 14 |     def __init__(self):
 15 |         self.openai_client = OpenAiClient()
 16 | 
 17 | 
 18 |     def create_swagger_json(self, endpoints, authentication_information, framework, api_host):
 19 |         repo_path = get_repo_path()
 20 |         repo_name = get_repo_name()
 21 |         swagger = {
 22 |             "openapi": "3.0.0",
 23 |             "info": {
 24 |                 "title": repo_name,
 25 |                 "version": "1.0.0",
 26 |                 "description": "This Swagger file was generated using OpenAI GPT.",
 27 |                 "generated_at": datetime.datetime.utcnow().isoformat() + "Z",
 28 |                 "commit_reference": get_git_commit_hash(),
 29 |                 "github_repo_url": get_github_repo_url()
 30 |             },
 31 |             "servers": [
 32 |                 {
 33 |                     "url": api_host
 34 |                 }
 35 |             ],
 36 |             "paths": {}
 37 |         }
 38 |         print("\n***************************************************")
 39 |         print(f"\nstarted generating swagger for {len(endpoints)} endpoints")
 40 |         start_time = time.time()
 41 |         completed = 0
 42 | 
 43 |         def process_endpoint(endpoint):
 44 |             endpoint_swagger = self.generate_endpoint_swagger(endpoint, authentication_information, framework)
 45 |             return endpoint["path"], endpoint["method"].lower(), endpoint_swagger
 46 | 
 47 |         with ThreadPoolExecutor(max_workers=8) as executor:
 48 |             future_to_endpoint = {executor.submit(process_endpoint, endpoint): endpoint
 49 |                                   for endpoint in endpoints}
 50 | 
 51 |             for future in as_completed(future_to_endpoint):
 52 |                 path, method, endpoint_swagger = future.result()
 53 | 
 54 |                 if path not in swagger["paths"]:
 55 |                     swagger["paths"][path] = {}
 56 | 
 57 |                 key = list(endpoint_swagger['paths'].keys())[0]
 58 |                 _method_list = list(endpoint_swagger['paths'][key].keys())
 59 |                 if not _method_list:
 60 |                     continue
 61 |                 _method = _method_list[0]
 62 |                 swagger["paths"][path][_method] = endpoint_swagger['paths'][key][_method]
 63 | 
 64 |                 completed += 1
 65 |                 end_time = time.time()
 66 |                 print(f"completed generating swagger for {completed} endpoints in {int(end_time - start_time)} seconds",
 67 |                       end="\r")
 68 |         return swagger
 69 | 
 70 | 
 71 | 
 72 |     def generate_endpoint_swagger(self, endpoint, authentication_information, framework):
 73 |         if framework == "ruby_on_rails":
 74 |             prompt = prompts.ruby_on_rails_swagger_generation_prompt.format(endpoint_info = endpoint['info'], endpoint_method = endpoint['method'], endpoint_path = endpoint['path'],
 75 |                                                                             authentication_information = authentication_information)
 76 |         else:
 77 |             prompt = prompts.generic_swagger_generation_prompt.format(endpoint_info = endpoint['info'], endpoint_method = endpoint['method'], endpoint_path = endpoint['path'],
 78 |                                                                             authentication_information = authentication_information)
 79 |         messages = [
 80 |             {"role": "system", "content": prompts.swagger_generation_system_prompt},
 81 |             {"role": "user", "content": prompt}
 82 |         ]
 83 |         response_content = self.openai_client.call_chat_completion(messages=messages)
 84 |         try:
 85 |             start_index = response_content.find('{')
 86 |             end_index = response_content.rfind('}')
 87 |             swagger_json_block = response_content[start_index:end_index + 1]
 88 |             return json.loads(swagger_json_block)
 89 |         except Exception as ex:
 90 |             return {"paths": {endpoint['path']: {}}}
 91 | 
 92 | 
 93 |     @staticmethod
 94 |     def save_swagger_json(swagger, filename):
 95 |         """
 96 |         Saves the Swagger JSON to a file.
 97 | 
 98 |         Args:
 99 |             swagger (dict): The Swagger JSON dictionary.
100 |             filename (str): The output file name.
101 |         """
102 |         swagger = SwaggerGeneration._sanitize_swagger(swagger)
103 |         # Create directory if it doesn't exist
104 |         directory = os.path.dirname(filename)
105 |         if directory:
106 |             os.makedirs(directory, exist_ok=True)
107 |         with open(filename, 'w', encoding='utf-8') as file:
108 |             json.dump(swagger, file, indent=2)
109 |         # Display relative path (remove /workspace prefix if present)
110 |         display_path = filename
111 |         if filename.startswith('/workspace/'):
112 |             display_path = filename[len('/workspace/'):]
113 |             if not display_path.startswith('./'):
114 |                 display_path = './' + display_path
115 |         print(f"Swagger JSON saved to {display_path}.")
116 |         # Generate HTML viewer file in the same directory
117 |         SwaggerGeneration.generate_html_viewer(filename)
118 | 
119 |     @staticmethod
120 |     def generate_html_viewer(swagger_json_path):
121 |         """
122 |         Generates an HTML viewer file in the same directory as the swagger.json file.
123 |         Embeds the swagger.json data directly into the HTML to avoid CORS issues.
124 | 
125 |         Args:
126 |             swagger_json_path (str): Path to the swagger.json file.
127 |         """
128 |         try:
129 |             # Get the directory of the swagger.json file
130 |             swagger_dir = os.path.dirname(swagger_json_path)
131 |             if not swagger_dir:
132 |                 swagger_dir = '.'
133 |             
134 |             # Path to the HTML viewer template
135 |             html_template_path = os.path.join(os.path.dirname(__file__), 'apimesh-docs.html')
136 |             html_output_path = os.path.join(swagger_dir, 'apimesh-docs.html')
137 |             
138 |             # Read the swagger.json file
139 |             swagger_data = None
140 |             if os.path.exists(swagger_json_path):
141 |                 with open(swagger_json_path, 'r', encoding='utf-8') as f:
142 |                     swagger_data = json.load(f)
143 |             
144 |             # Read the HTML template
145 |             if os.path.exists(html_template_path):
146 |                 with open(html_template_path, 'r', encoding='utf-8') as f:
147 |                     html_content = f.read()
148 |                 
149 |                 # Replace <repo_name> placeholder with formatted repo name from utils
150 |                 repo_name = get_repo_name()
151 |                 formatted_repo_name = format_repo_name(repo_name)
152 |                 html_content = html_content.replace('<repo_name>', formatted_repo_name)
153 |                 
154 |                 # Embed the swagger data as a JavaScript variable
155 |                 if swagger_data:
156 |                     # Escape the JSON for embedding in JavaScript
157 |                     swagger_json_str = json.dumps(swagger_data, indent=2)
158 |                     swagger_json_str = re.sub(r'</(script)', r'<\/\1', swagger_json_str, flags=re.IGNORECASE)
159 |                     # Replace the placeholder or add the embedded data before the closing script tag
160 |                     # We'll add it right after the script tag opens
161 |                     embedded_data_script = f'''
162 |         // Embedded Swagger data (to avoid CORS issues)
163 |         const EMBEDDED_SWAGGER_DATA = {swagger_json_str};
164 | '''
165 |                     # Find the script tag and insert the embedded data right after it
166 |                     script_start = html_content.find('<script>')
167 |                     if script_start != -1:
168 |                         insert_pos = script_start + len('<script>')
169 |                         html_content = html_content[:insert_pos] + embedded_data_script + html_content[insert_pos:]
170 |                 
171 |                 # Write the modified HTML to the output directory
172 |                 with open(html_output_path, 'w', encoding='utf-8') as f:
173 |                     f.write(html_content)
174 |                 
175 |                 # Display relative path (remove /workspace prefix if present)
176 |                 display_path = html_output_path
177 |                 if html_output_path.startswith('/workspace/'):
178 |                     display_path = html_output_path[len('/workspace/'):]
179 |                     if not display_path.startswith('./'):
180 |                         display_path = './' + display_path
181 |                 
182 |                 # Print formatted success message
183 |                 print("\n==========================================")
184 |                 print("Swagger HTML Viewer Generated Successfully")
185 |                 print("==========================================\n")
186 |                 print("The HTML viewer has been generated at:")
187 |                 print(f"  Relative path:  {display_path} (in your mounted volume)\n")
188 |                 print("To view it:")
189 |                 print("  1. The file is in your mounted volume directory")
190 |                 print("  2. Open it directly in your browser from your host machine:")
191 |                 print("     Navigate to your repository directory and open:")
192 |                 print(f"     {display_path}\n")
193 |                 print("==========================================")
194 |                 
195 |                 return display_path
196 |             else:
197 |                 print(f"Warning: HTML template not found at {html_template_path}")
198 |                 return None
199 |         except Exception as ex:
200 |             print(f"Warning: Could not generate HTML viewer: {ex}")
201 |             return None
202 | 
203 |     @staticmethod
204 |     def _sanitize_swagger(swagger: dict) -> dict:
205 |         """
206 |         Apply lightweight, framework-agnostic cleanup:
207 |         - normalize Express-style segments (:param -> {param})
208 |         - merge duplicate paths created by differing param syntax
209 |         - drop wildcard /* or * paths that often come from generic middleware
210 |         """
211 |         paths = swagger.get("paths", {})
212 |         if not isinstance(paths, dict):
213 |             return swagger
214 | 
215 |         def normalize(path: str) -> str:
216 |             return re.sub(r":([A-Za-z_][\w-]*)", r"{\1}", path)
217 | 
218 |         # Drop wildcard paths
219 |         for wildcard in ("/*", "*"):
220 |             paths.pop(wildcard, None)
221 | 
222 |         # Re-key normalized paths
223 |         for original in list(paths.keys()):
224 |             normalized = normalize(original)
225 |             if normalized == original:
226 |                 continue
227 |             methods = paths.pop(original)
228 |             if normalized not in paths:
229 |                 paths[normalized] = methods
230 |             else:
231 |                 # merge methods, favor normalized version
232 |                 paths[normalized].update(methods)
233 | 
234 |         swagger["paths"] = paths
235 |         return swagger
236 | 


--------------------------------------------------------------------------------
/golang_pipeline/identify_api_functions.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from pathlib import Path
  3 | from typing import Dict, List, Optional, Sequence
  4 | 
  5 | from tree_sitter import Language, Node, Parser
  6 | import tree_sitter_go
  7 | 
  8 | GO_LANGUAGE = Language(tree_sitter_go.language())
  9 | parser = Parser(GO_LANGUAGE)
 10 | 
 11 | 
 12 | HTTP_METHODS = {
 13 |     "GET",
 14 |     "POST",
 15 |     "PUT",
 16 |     "PATCH",
 17 |     "DELETE",
 18 |     "OPTIONS",
 19 |     "HEAD",
 20 | }
 21 | _CHAIN_TERMINATORS = {"methods"}
 22 | _ROUTER_FUNCTIONS = {"handlefunc", "handle"}
 23 | _GROUP_METHODS = {"group", "route", "pathprefix"}
 24 | 
 25 | 
 26 | @dataclass
 27 | class HandlerInfo:
 28 |     name: str
 29 |     file_path: Optional[str]
 30 |     start_line: Optional[int]
 31 |     end_line: Optional[int]
 32 |     handler_name: Optional[str]
 33 |     selector: Optional[str] = None
 34 | 
 35 | 
 36 | def _node_text(source: str, node: Node) -> str:
 37 |     return source[node.start_byte : node.end_byte]
 38 | 
 39 | 
 40 | def _strip_quotes(value: Optional[str]) -> str:
 41 |     if not value:
 42 |         return ""
 43 |     value = value.strip()
 44 |     if value.startswith(("`", '"')) and value.endswith(("`", '"')):
 45 |         return value[1:-1]
 46 |     return value
 47 | 
 48 | 
 49 | def _collect_function_definitions(
 50 |     root: Node, source: str, file_path: Path
 51 | ) -> Dict[str, Dict]:
 52 |     functions: Dict[str, Dict] = {}
 53 |     stack = [root]
 54 |     while stack:
 55 |         node = stack.pop()
 56 |         if node.type == "function_declaration":
 57 |             name_node = node.child_by_field_name("name")
 58 |             if name_node:
 59 |                 func_name = _node_text(source, name_node)
 60 |                 entry = {
 61 |                     "type": "function",
 62 |                     "name": func_name,
 63 |                     "start_line": node.start_point[0] + 1,
 64 |                     "end_line": node.end_point[0] + 1,
 65 |                     "file_path": str(file_path),
 66 |                 }
 67 |                 functions.setdefault(func_name, entry)
 68 |         stack.extend(list(node.children))
 69 |     return functions
 70 | 
 71 | 
 72 | def _iter_call_arguments(call_node: Node) -> Sequence[Node]:
 73 |     arguments_node = call_node.child_by_field_name("arguments")
 74 |     if not arguments_node:
 75 |         return ()
 76 |     return [
 77 |         child
 78 |         for child in arguments_node.children
 79 |         if child.type not in {"(", ")", ","}
 80 |     ]
 81 | 
 82 | 
 83 | def _is_string_literal(node: Node) -> bool:
 84 |     return node.type in {"interpreted_string_literal", "raw_string_literal"}
 85 | 
 86 | 
 87 | def _extract_path_argument(call_node: Node, source: str) -> Optional[str]:
 88 |     for arg in _iter_call_arguments(call_node):
 89 |         if _is_string_literal(arg):
 90 |             return _strip_quotes(_node_text(source, arg))
 91 |     return None
 92 | 
 93 | 
 94 | def _extract_methods_from_arguments(call_node: Node, source: str) -> List[str]:
 95 |     methods: List[str] = []
 96 |     for arg in _iter_call_arguments(call_node):
 97 |         if _is_string_literal(arg):
 98 |             method = _strip_quotes(_node_text(source, arg)).upper()
 99 |             if method:
100 |                 methods.append(method)
101 |     return methods
102 | 
103 | 
104 | def _is_selector_operand_of(node: Node, field_name: str, source: str) -> bool:
105 |     parent = node.parent
106 |     if not parent or parent.type != "selector_expression":
107 |         return False
108 |     field_node = parent.child_by_field_name("field")
109 |     if not field_node:
110 |         return False
111 |     if _node_text(source, field_node).lower() != field_name.lower():
112 |         return False
113 |     grandparent = parent.parent
114 |     return bool(grandparent and grandparent.type == "call_expression")
115 | 
116 | 
117 | def _normalize_http_method(name: str) -> Optional[str]:
118 |     if not name:
119 |         return None
120 |     normalized = name.upper()
121 |     if normalized in HTTP_METHODS:
122 |         return normalized
123 |     if name.lower() == "any":
124 |         return "ANY"
125 |     return None
126 | 
127 | 
128 | def _extract_handler_info(
129 |     handler_node: Optional[Node],
130 |     source: str,
131 |     file_path: Path,
132 |     functions_by_name: Dict[str, Dict],
133 | ) -> Optional[HandlerInfo]:
134 |     if handler_node is None:
135 |         return None
136 | 
137 |     node_type = handler_node.type
138 |     if node_type == "identifier":
139 |         handler_name = _node_text(source, handler_node)
140 |         definition = functions_by_name.get(handler_name)
141 |         return HandlerInfo(
142 |             name=handler_name,
143 |             handler_name=handler_name,
144 |             file_path=definition.get("file_path") if definition else None,
145 |             start_line=definition.get("start_line") if definition else None,
146 |             end_line=definition.get("end_line") if definition else None,
147 |         )
148 | 
149 |     if node_type == "selector_expression":
150 |         field_node = handler_node.child_by_field_name("field")
151 |         if not field_node:
152 |             return None
153 |         handler_name = _node_text(source, field_node)
154 |         definition = functions_by_name.get(handler_name)
155 |         return HandlerInfo(
156 |             name=_node_text(source, handler_node),
157 |             handler_name=handler_name,
158 |             file_path=definition.get("file_path") if definition else None,
159 |             start_line=definition.get("start_line") if definition else None,
160 |             end_line=definition.get("end_line") if definition else None,
161 |             selector=_node_text(source, handler_node),
162 |         )
163 | 
164 |     if node_type == "function_literal":
165 |         return HandlerInfo(
166 |             name=f"inline_handler@{handler_node.start_point[0] + 1}",
167 |             handler_name=None,
168 |             file_path=str(file_path),
169 |             start_line=handler_node.start_point[0] + 1,
170 |             end_line=handler_node.end_point[0] + 1,
171 |         )
172 |     return None
173 | 
174 | 
175 | def _extract_handler_node(call_node: Node) -> Optional[Node]:
176 |     args = list(_iter_call_arguments(call_node))
177 |     if len(args) < 2:
178 |         return args[1] if len(args) == 2 else None
179 |     # Frameworks like gin/echo accept multiple middleware + handler arguments.
180 |     for candidate in reversed(args):
181 |         if candidate.type in {"identifier", "selector_expression", "function_literal"}:
182 |             return candidate
183 |     return None
184 | 
185 | 
186 | def _join_paths(*segments: Optional[str]) -> str:
187 |     parts = [segment for segment in segments if segment]
188 |     if not parts:
189 |         return ""
190 |     path = ""
191 |     for segment in parts:
192 |         if not segment:
193 |             continue
194 |         if not path:
195 |             path = segment
196 |             continue
197 |         if not path.endswith("/") and not segment.startswith("/"):
198 |             path = f"{path}/{segment}"
199 |         elif path.endswith("/") and segment.startswith("/"):
200 |             path = f"{path}{segment.lstrip('/')}"
201 |         else:
202 |             path = f"{path}{segment}"
203 |     return path
204 | 
205 | 
206 | def _collect_path_prefix(node: Optional[Node], source: str) -> Optional[str]:
207 |     segments: List[str] = []
208 |     current = node
209 |     visited = set()
210 |     while current and current.type == "call_expression":
211 |         if current.id in visited:
212 |             break
213 |         visited.add(current.id)
214 |         func_node = current.child_by_field_name("function")
215 |         if not func_node or func_node.type != "selector_expression":
216 |             break
217 |         field_node = func_node.child_by_field_name("field")
218 |         method_lower = _node_text(source, field_node).lower() if field_node else ""
219 |         if method_lower in _GROUP_METHODS:
220 |             prefix = _extract_path_argument(current, source)
221 |             if prefix:
222 |                 segments.append(prefix)
223 |         operand = func_node.child_by_field_name("operand")
224 |         if not operand:
225 |             break
226 |         current = operand if operand.type == "call_expression" else None
227 |     if not segments:
228 |         return None
229 |     # segments collected from inner-most outward; reverse to maintain call order.
230 |     return "".join(
231 |         f"{segment if segment.startswith('/') else '/' + segment}"
232 |         for segment in reversed(segments)
233 |     )
234 | 
235 | 
236 | def _build_endpoint_entry(
237 |     path: str,
238 |     http_method: str,
239 |     handler_info: HandlerInfo,
240 |     route_file: Path,
241 | ) -> Dict:
242 |     entry = {
243 |         "type": "function",
244 |         "route": path,
245 |         "http_method": http_method,
246 |         "route_file": str(route_file),
247 |         "name": handler_info.name,
248 |         "handler_name": handler_info.handler_name or handler_info.name,
249 |         "handler_selector": handler_info.selector,
250 |         "file_path": handler_info.file_path,
251 |         "start_line": handler_info.start_line,
252 |         "end_line": handler_info.end_line,
253 |     }
254 |     return entry
255 | 
256 | 
257 | def _extract_routes_from_call(
258 |     call_node: Node,
259 |     source: str,
260 |     file_path: Path,
261 |     functions_by_name: Dict[str, Dict],
262 | ) -> List[Dict]:
263 |     func_node = call_node.child_by_field_name("function")
264 |     if not func_node:
265 |         return []
266 | 
267 |     if func_node.type == "selector_expression":
268 |         field_node = func_node.child_by_field_name("field")
269 |         if not field_node:
270 |             return []
271 |         method_name = _node_text(source, field_node)
272 |         method_lower = method_name.lower()
273 |         operand_node = func_node.child_by_field_name("operand")
274 | 
275 |         if method_lower in _CHAIN_TERMINATORS and operand_node and operand_node.type == "call_expression":
276 |             base_routes = _extract_routes_from_call(
277 |                 operand_node, source, file_path, functions_by_name
278 |             )
279 |             http_methods = _extract_methods_from_arguments(call_node, source)
280 |             if not http_methods:
281 |                 return base_routes
282 |             expanded: List[Dict] = []
283 |             for route in base_routes:
284 |                 for verb in http_methods:
285 |                     clone = route.copy()
286 |                     clone["http_method"] = verb
287 |                     expanded.append(clone)
288 |             return expanded
289 | 
290 |         normalized_method = _normalize_http_method(method_name)
291 |         if normalized_method:
292 |             path = _extract_path_argument(call_node, source)
293 |             if not path:
294 |                 return []
295 |             prefix = _collect_path_prefix(operand_node, source)
296 |             full_path = _join_paths(prefix, path) if prefix else path
297 |             handler_node = _extract_handler_node(call_node)
298 |             handler_info = _extract_handler_info(
299 |                 handler_node, source, file_path, functions_by_name
300 |             )
301 |             if not handler_info:
302 |                 return []
303 |             return [
304 |                 _build_endpoint_entry(full_path, normalized_method, handler_info, file_path)
305 |             ]
306 | 
307 |         if method_lower in _ROUTER_FUNCTIONS:
308 |             path = _extract_path_argument(call_node, source)
309 |             if not path:
310 |                 return []
311 |             prefix = _collect_path_prefix(operand_node, source)
312 |             full_path = _join_paths(prefix, path) if prefix else path
313 |             handler_node = _extract_handler_node(call_node)
314 |             handler_info = _extract_handler_info(
315 |                 handler_node, source, file_path, functions_by_name
316 |             )
317 |             if not handler_info:
318 |                 return []
319 |             return [
320 |                 _build_endpoint_entry(full_path, "GET", handler_info, file_path)
321 |             ]
322 | 
323 |     return []
324 | 
325 | 
326 | def _is_call_operand_of_methods(call_node: Node, source: str) -> bool:
327 |     return _is_selector_operand_of(call_node, "methods", source)
328 | 
329 | 
330 | def find_api_endpoints(file_path: Path, repo_root: str) -> List[Dict]:
331 |     try:
332 |         source = file_path.read_text(encoding="utf-8")
333 |     except OSError:
334 |         return []
335 | 
336 |     tree = parser.parse(source.encode("utf-8"))
337 |     functions_by_name = _collect_function_definitions(
338 |         tree.root_node, source, file_path
339 |     )
340 | 
341 |     endpoints: List[Dict] = []
342 |     stack = [tree.root_node]
343 |     while stack:
344 |         node = stack.pop()
345 |         if node.type == "call_expression":
346 |             if _is_call_operand_of_methods(node, source):
347 |                 stack.extend(list(node.children))
348 |                 continue
349 |             routes = _extract_routes_from_call(
350 |                 node, source, file_path, functions_by_name
351 |             )
352 |             if routes:
353 |                 endpoints.extend(routes)
354 |         stack.extend(list(node.children))
355 | 
356 |     return endpoints
357 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/run_swagger_generation.py:
--------------------------------------------------------------------------------
  1 | import os, json, re
  2 | import shutil
  3 | import datetime
  4 | import time
  5 | from concurrent.futures import ThreadPoolExecutor, as_completed
  6 | from pathlib import Path
  7 | from nodejs_pipeline.generate_file_information import process_file
  8 | from nodejs_pipeline.find_api_definition_files import find_api_definition_files
  9 | from nodejs_pipeline.identify_api_functions import find_api_endpoints_js
 10 | from config import Configurations
 11 | from nodejs_pipeline.definition_swagger_generator import get_function_definition_swagger
 12 | from nodejs_pipeline.constants import (
 13 |     SUPPORTED_NODE_FILE_EXTENSIONS,
 14 |     METADATA_DIR_NAME,
 15 | )
 16 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name
 17 | 
 18 | config = Configurations()
 19 | 
 20 | 
 21 | def _metadata_dir_path(directory_path: str) -> str:
 22 |     return os.path.join(directory_path, METADATA_DIR_NAME)
 23 | 
 24 | 
 25 | def _metadata_file_name(file_path: str) -> str:
 26 |     sanitized = str(file_path).replace("/", "_q_").replace("\\", "_q_")
 27 |     name, _ = os.path.splitext(sanitized)
 28 |     return f"{name}.json"
 29 | 
 30 | 
 31 | def should_process_directory(dir_path: str) -> bool:
 32 |     """
 33 |     Check if a directory should be processed or ignored
 34 |     """
 35 |     path_parts = dir_path.split(os.sep)
 36 |     return not any(part in config.ignored_dirs for part in path_parts)
 37 | 
 38 | def _normalize_route(route: str):
 39 |     if not route:
 40 |         return route
 41 |     # Convert Express-style :param to OpenAPI {param}
 42 |     return re.sub(r":([A-Za-z_][\w-]*)", r"{\1}", route)
 43 | 
 44 | 
 45 | def _extract_brace_block(lines, start_idx: int):
 46 |     brace_depth = 0
 47 |     collected = []
 48 |     started = False
 49 |     for line in lines[start_idx:]:
 50 |         collected.append(line)
 51 |         brace_depth += line.count("{") - line.count("}")
 52 |         if "{" in line:
 53 |             started = True
 54 |         if started and brace_depth <= 0:
 55 |             break
 56 |     return collected
 57 | 
 58 | 
 59 | def _find_use_block(lines, pattern: str):
 60 |     matcher = re.compile(pattern)
 61 |     for idx, line in enumerate(lines):
 62 |         if matcher.search(line):
 63 |             return _extract_brace_block(lines, idx)
 64 |     return None
 65 | 
 66 | 
 67 | def run_swagger_generation(host):
 68 |     directory_path = get_repo_path()
 69 |     repo_name = get_repo_name()
 70 |     new_dir_path = _metadata_dir_path(directory_path)
 71 |     os.makedirs(new_dir_path, exist_ok=True)
 72 |     try:
 73 |         for root, dirs, files in os.walk(directory_path):
 74 |             for file in files:
 75 |                 file_path = os.path.join(root, file)
 76 |                 suffix = Path(file_path).suffix.lower()
 77 |                 if (
 78 |                     os.path.exists(file_path)
 79 |                     and should_process_directory(str(file_path))
 80 |                     and suffix in SUPPORTED_NODE_FILE_EXTENSIONS
 81 |                 ):
 82 |                     try:
 83 |                         file_info = process_file(file_path, directory_path)
 84 |                     except Exception as ex:
 85 |                         continue
 86 |                     json_file_name = os.path.join(new_dir_path, _metadata_file_name(file_path))
 87 |                     with open(json_file_name, "w") as f:
 88 |                         json.dump(file_info, f, indent=4)
 89 |         api_definition_files = find_api_definition_files(directory_path)
 90 |         all_endpoints_dict = dict()
 91 |         for file in api_definition_files:
 92 |             all_endpoints = []
 93 |             py_file = Path(file)
 94 |             eps = find_api_endpoints_js(py_file)
 95 |             if eps:
 96 |                 all_endpoints.extend(eps)
 97 |                 all_endpoints_dict[file] = all_endpoints
 98 |         swagger = {
 99 |                 "openapi": "3.0.0",
100 |                 "info": {
101 |                     "title": repo_name,
102 |                     "version": "1.0.0",
103 |                     "description": "This Swagger file was generated using OpenAI GPT.",
104 |                     "generated_at": datetime.datetime.utcnow().isoformat() + "Z",
105 |                     "commit_reference": get_git_commit_hash(),
106 |                     "github_repo_url": get_github_repo_url()
107 |                 },
108 |                 "servers": [
109 |                     {
110 |                         "url": host
111 |                     }
112 |                 ],
113 |                 "paths": {}
114 |             }
115 |         endpoint_jobs = []
116 |         for value in all_endpoints_dict.values():
117 |             for item in value:
118 |                 if item.get('type') == 'class':
119 |                     endpoint_jobs.extend(item.get('methods', []))
120 |                 else:
121 |                     endpoint_jobs.append(item)
122 |         # Normalize paths once to avoid duplicates like /:name vs /{name}
123 |         for job in endpoint_jobs:
124 |             if 'route' in job:
125 |                 job['route'] = _normalize_route(job['route'])
126 |         if not endpoint_jobs:
127 |             return swagger
128 | 
129 |         def _generate_swagger_fragment(method_info):
130 |             context_code_blocks, method_definition_code_block = provide_context_codeblock(directory_path, method_info)
131 |             return get_function_definition_swagger(method_definition_code_block, context_code_blocks, method_info['route'])
132 | 
133 |         max_workers = min(5, len(endpoint_jobs))
134 |         start_time = time.time()
135 |         completed = 0
136 |         latest_message = ""
137 |         with ThreadPoolExecutor(max_workers=max_workers or 1) as executor:
138 |             futures = [executor.submit(_generate_swagger_fragment, method) for method in endpoint_jobs]
139 |             for future in as_completed(futures):
140 |                 swagger_for_def = future.result()
141 |                 _merge_paths(swagger, swagger_for_def)
142 |                 completed += 1
143 |                 latest_message = (
144 |                     f"Completed generating endpoint related information for {completed} endpoints in "
145 |                     f"{int(time.time() - start_time)} seconds"
146 |                 )
147 |                 print(latest_message, end="\r", flush=True)
148 |         if completed:
149 |             print(latest_message)
150 |         _post_process_swagger(swagger)
151 |         return swagger
152 |     finally:
153 |         if os.path.exists(new_dir_path):
154 |             shutil.rmtree(new_dir_path, ignore_errors=True)
155 | 
156 | 
157 | def get_dependencies(data, start_line, end_line, file_path):
158 |     elements = data.get('elements', {})
159 |     functions = elements.get('functions', [])
160 |     existing_function_names = [item['name'] for item in functions if item['name'] not in ['get', 'post', 'put', 'delete', 'patch']]
161 |     function_lookup = {}
162 |     for func in functions:
163 |         function_lookup.setdefault(func['name'], []).append(func)
164 |     in_file_dependency_functions = []
165 |     for item in elements.get('function_calls', []):
166 |         if (item['name'] in existing_function_names) and item['start_line'] >= start_line and item['end_line'] <= end_line:
167 |             call_line = item.get('start_line')
168 |             definition = None
169 |             candidates = function_lookup.get(item['name'], [])
170 |             if candidates:
171 |                 candidates = sorted(candidates, key=lambda func: func.get('start_line', 0))
172 |                 for candidate in candidates:
173 |                     start = candidate.get('start_line')
174 |                     end = candidate.get('end_line')
175 |                     if start and end and start <= call_line <= end:
176 |                         definition = candidate
177 |                         break
178 |                     if start and start <= call_line:
179 |                         definition = candidate
180 |                 if not definition:
181 |                     definition = candidates[0]
182 |             dependency_info = {
183 |                 'name': item['name'],
184 |                 'file_path': file_path,
185 |                 'call_start_line': item.get('start_line'),
186 |                 'call_end_line': item.get('end_line'),
187 |                 'function_start_line': None,
188 |                 'function_end_line': None
189 |             }
190 |             if definition:
191 |                 dependency_info['function_start_line'] = definition.get('start_line')
192 |                 dependency_info['function_end_line'] = definition.get('end_line')
193 |             else:
194 |                 dependency_info['function_start_line'] = item.get('start_line')
195 |                 dependency_info['function_end_line'] = item.get('end_line')
196 |             in_file_dependency_functions.append(dependency_info)
197 |     imported_functions = []
198 |     for item in elements.get('imports', []):
199 |         if not item['path_exists']:
200 |             continue
201 |         for k in item['usage_lines']:
202 |             if start_line<=k<=end_line:
203 |                 imported_functions.append(item)
204 |             if in_file_dependency_functions:
205 |                 for item1 in in_file_dependency_functions:
206 |                     dep_start = item1.get('call_start_line')
207 |                     dep_end = item1.get('call_end_line')
208 |                     if dep_start and dep_end and dep_start <= k <= dep_end and item not in imported_functions:
209 |                         imported_functions.append(item)
210 |     return in_file_dependency_functions, imported_functions
211 | 
212 | def get_code_blocks(in_file_dependency_functions, imported_functions, file_name, directory_path):
213 |     code_blocks = []
214 |     for block in in_file_dependency_functions:
215 |         block_file_name = block.get('file_path', file_name)
216 |         start = block.get('function_start_line')
217 |         end = block.get('function_end_line', start)
218 |         if not block_file_name or not start or not end:
219 |             continue
220 |         with open(block_file_name, "r") as f:
221 |             lines = f.readlines()
222 |         code_blocks.append(lines[start - 1: end])
223 |     for func in imported_functions:
224 |         visited = False
225 |         origin_file_name = func.get('origin')
226 |         if not origin_file_name:
227 |             continue
228 |         json_dir_path = _metadata_dir_path(directory_path)
229 |         complete_json_file_path = os.path.join(json_dir_path, _metadata_file_name(origin_file_name))
230 |         if not os.path.exists(complete_json_file_path):
231 |             continue
232 |         with open(complete_json_file_path, "r") as f:
233 |             data = json.load(f)
234 |         for item in data['elements']['classes']:
235 |             if item['name'] == func['imported_name']:
236 |                 visited = True
237 |                 with open(origin_file_name, "r") as f:
238 |                     lines = f.readlines()
239 |                 code_blocks.append(lines[item['start_line']-1: item['end_line']])
240 |                 break
241 |         if not visited:
242 |             for item in data['elements']['functions']:
243 |                 if item['name'] == func['imported_name']:
244 |                     visited = True
245 |                     with open(origin_file_name, "r") as f:
246 |                         lines = f.readlines()
247 |                     code_blocks.append(lines[item['start_line'] - 1: item['end_line']])
248 |                     break
249 |         if not visited:
250 |             for item in data['elements']['variables']:
251 |                 if item['name'] == func['imported_name']:
252 |                     with open(origin_file_name, "r") as f:
253 |                         lines = f.readlines()
254 |                     code_blocks.append(lines[item['start_line'] - 1: item['end_line']])
255 |                     break
256 |     return code_blocks
257 | 
258 | 
259 | def provide_context_codeblock(directory_path, method_info):
260 |     file_name = method_info['file_path']
261 |     with open(method_info['file_path'], "r") as f:
262 |         lines = f.readlines()
263 |     method_definition_code_block = lines[method_info["start_line"]-1: method_info["end_line"]]
264 |     json_dir_path = _metadata_dir_path(directory_path)
265 |     json_file = _metadata_file_name(file_name)
266 |     complete_json_file_path = os.path.join(json_dir_path, json_file)
267 | 
268 |     if not os.path.exists(complete_json_file_path):
269 |         return [], method_definition_code_block
270 | 
271 |     with open(complete_json_file_path, "r") as f:
272 |         data = json.load(f)
273 | 
274 |     in_file_dependency_functions, imported_functions = get_dependencies(
275 |         data,
276 |         method_info["start_line"],
277 |         method_info["end_line"],
278 |         method_info['file_path']
279 |     )
280 |     context_code_blocks = get_code_blocks(
281 |         in_file_dependency_functions,
282 |         imported_functions,
283 |         file_name,
284 |         directory_path
285 |     )
286 |     # Include catch-all responder middleware (e.g., app.use('/:name', ...)) to give the LLM response semantics.
287 |     responder_block = _find_use_block(lines, pattern=r"\.use\s*\(\s*['\"]/:")
288 |     if responder_block:
289 |         context_code_blocks.append(responder_block)
290 |     return context_code_blocks, method_definition_code_block
291 | 
292 | 
293 | def _merge_paths(target, source):
294 |     paths = source.get("paths", {})
295 |     for path_key, methods in paths.items():
296 |         target.setdefault("paths", {})
297 |         normalized_path = _normalize_route(path_key)
298 |         target["paths"].setdefault(normalized_path, {})
299 |         for method, payload in methods.items():
300 |             target["paths"][normalized_path][method] = payload
301 | 
302 | 
303 | def _post_process_swagger(swagger):
304 |     """
305 |     Clean up generated swagger to better align with tinyhttp behavior:
306 |     - drop wildcard /* CORS path
307 |     - normalize any lingering :param segments
308 |     - adjust /{name} POST to return 201 and optional body
309 |     - remove spurious 400 from GET /{name}
310 |     - allow string|array for _dependent in DELETE /{name}/{id}
311 |     """
312 |     paths = swagger.get("paths", {})
313 |     # drop wildcard CORS catch-all if present
314 |     paths.pop("/*", None)
315 |     paths.pop("*", None)
316 | 
317 |     # Re-key any lingering express-style paths
318 |     for original in list(paths.keys()):
319 |         normalized = _normalize_route(original)
320 |         if normalized != original:
321 |             existing = paths.pop(original)
322 |             if normalized not in paths:
323 |                 paths[normalized] = existing
324 |             else:
325 |                 paths[normalized].update(existing)
326 | 
327 |     # Fix POST /{name}
328 |     post_name = paths.get("/{name}", {}).get("post")
329 |     if post_name:
330 |         # body optional
331 |         if "requestBody" in post_name:
332 |             post_name["requestBody"]["required"] = False
333 |         # prefer 201, reuse existing schema if available
334 |         responses = post_name.setdefault("responses", {})
335 |         schema = None
336 |         for code in ("201", "200"):
337 |             resp = responses.get(code)
338 |             if not resp:
339 |                 continue
340 |             content = resp.get("content", {})
341 |             app_json = content.get("application/json", {})
342 |             schema = app_json.get("schema")
343 |             if schema:
344 |                 break
345 |         if not schema:
346 |             schema = {
347 |                 "type": "object",
348 |                 "properties": {"id": {"type": "string"}},
349 |                 "additionalProperties": True,
350 |             }
351 |         responses.clear()
352 |         responses["201"] = {
353 |             "description": "Resource created successfully.",
354 |             "content": {
355 |                 "application/json": {
356 |                     "schema": schema
357 |                 }
358 |             }
359 |         }
360 |         responses["404"] = {"description": "Collection not found."}
361 | 
362 |     # Clean GET /{name} errors
363 |     get_name = paths.get("/{name}", {}).get("get")
364 |     if get_name:
365 |         get_responses = get_name.get("responses", {})
366 |         get_responses.pop("400", None)
367 | 
368 |     # Fix _dependent param schema on DELETE /{name}/{id}
369 |     delete_item = paths.get("/{name}/{id}", {}).get("delete")
370 |     if delete_item:
371 |         for param in delete_item.get("parameters", []):
372 |             if param.get("name") == "_dependent":
373 |                 param["schema"] = {
374 |                     "oneOf": [
375 |                         {"type": "string"},
376 |                         {"type": "array", "items": {"type": "string"}},
377 |                     ]
378 |                 }
379 |                 break
380 | 


--------------------------------------------------------------------------------
/nodejs_pipeline/identify_api_functions.py:
--------------------------------------------------------------------------------
  1 | from pathlib import Path
  2 | import esprima
  3 | import json
  4 | import re
  5 | from tree_sitter import Language, Parser
  6 | import tree_sitter_typescript
  7 | from nodejs_pipeline.constants import (
  8 |     TYPESCRIPT_FILE_EXTENSIONS,
  9 |     TSX_FILE_EXTENSIONS,
 10 | )
 11 | 
 12 | 
 13 | API_METHODS = {"get", "post", "put", "delete", "patch", "options", "head", "all"}
 14 | ROUTE_OBJECT_KEYWORDS = {"app", "router", "route", "api", "controller", "server"}
 15 | ROUTE_OBJECT_SUFFIXES = ("router", "routes", "route", "app", "server", "controller", "api")
 16 | OPTIONAL_CATCH_PATTERN = re.compile(r'catch\s*(\{)')
 17 | FALLBACK_ENDPOINT_PATTERN = re.compile(
 18 |     r'(?P<object>[A-Za-z_$][\w$]*)\s*\.\s*(?P<method>GET|POST|PUT|DELETE|PATCH|OPTIONS|HEAD|ALL)\s*\(\s*(?P<route>["\'].*?["\'])?',
 19 |     re.IGNORECASE | re.DOTALL
 20 | )
 21 | 
 22 | TS_LANGUAGE = Language(tree_sitter_typescript.language_typescript())
 23 | TSX_LANGUAGE = Language(tree_sitter_typescript.language_tsx())
 24 | 
 25 | 
 26 | def _parse_with_optional_catch_fallback(source, *, loc=True):
 27 |     """
 28 |     Attempt to parse JavaScript source. If the parser fails because of
 29 |     optional catch binding syntax (catch { ... }), rewrite those blocks
 30 |     to catch (__apimesh_err) { ... } and retry once.
 31 |     """
 32 |     try:
 33 |         return esprima.parseModule(source, loc=loc)
 34 |     except Exception as first_error:
 35 |         patched_source, replaced = OPTIONAL_CATCH_PATTERN.subn('catch (__apimesh_err) {', source)
 36 |         if not replaced:
 37 |             raise first_error
 38 |         try:
 39 |             return esprima.parseModule(patched_source, loc=loc)
 40 |         except Exception:
 41 |             raise first_error
 42 | 
 43 | 
 44 | def _extract_endpoints_with_regex(source: str, file_path: Path):
 45 |     """Fallback endpoint detector when esprima cannot parse the file."""
 46 |     endpoints = []
 47 |     for match in FALLBACK_ENDPOINT_PATTERN.finditer(source):
 48 |         method = match.group('method').upper()
 49 |         route_literal = match.group('route')
 50 |         route = None
 51 |         if route_literal and len(route_literal) >= 2:
 52 |             route = route_literal[1:-1]
 53 |         start = match.start()
 54 |         end = match.end()
 55 |         start_line = source.count('\n', 0, start) + 1
 56 |         end_line = source.count('\n', 0, end) + 1
 57 |         obj = match.group('object') or ""
 58 |         low = obj.lower()
 59 |         if not (low in ROUTE_OBJECT_KEYWORDS or any(low.endswith(suf) for suf in ROUTE_OBJECT_SUFFIXES) or low.startswith(('app', 'api'))):
 60 |             continue
 61 |         endpoints.append({
 62 |             "type": "function",
 63 |             "method": method,
 64 |             "route": route,
 65 |             "start_line": start_line,
 66 |             "end_line": end_line,
 67 |             "file_path": str(file_path)
 68 |         })
 69 |     return endpoints
 70 | 
 71 | 
 72 | def find_api_endpoints_js(file_path: Path):
 73 |     try:
 74 |         source = file_path.read_text(encoding='utf-8')
 75 |     except Exception:
 76 |         return []
 77 | 
 78 |     suffix = file_path.suffix.lower()
 79 |     if suffix in TYPESCRIPT_FILE_EXTENSIONS or suffix in TSX_FILE_EXTENSIONS:
 80 |         return _find_api_endpoints_ts(file_path, source)
 81 | 
 82 |     return _find_api_endpoints_js(file_path, source)
 83 | 
 84 | 
 85 | def _find_api_endpoints_js(file_path: Path, source: str):
 86 |     try:
 87 |         tree = _parse_with_optional_catch_fallback(source, loc=True)
 88 |     except Exception as e:
 89 |         return _extract_endpoints_with_regex(source, file_path)
 90 | 
 91 |     endpoints = []
 92 | 
 93 |     def extract_call_expression(node, parent_obj=None):
 94 |         """Extract API endpoints from CallExpressions like app.get('/users', handler)"""
 95 |         if node.type == "CallExpression":
 96 |             callee = node.callee
 97 | 
 98 |             # Handle app.get(...) or router.post(...)
 99 |             if callee.type == "MemberExpression" and callee.property.type == "Identifier":
100 |                 method_name = callee.property.name.lower()
101 | 
102 |                 if method_name in API_METHODS and node.arguments:
103 |                     # Check first argument (the route string)
104 |                     first_arg = node.arguments[0]
105 |                     if first_arg.type == "Literal" and isinstance(first_arg.value, str):
106 |                         route = first_arg.value
107 |                     else:
108 |                         route = None
109 | 
110 |                     endpoints.append({
111 |                         "type": "function",
112 |                         "method": method_name.upper(),
113 |                         "route": route,
114 |                         "start_line": node.loc.start.line,
115 |                         "end_line": node.loc.end.line,
116 |                         "file_path": str(file_path)
117 |                     })
118 | 
119 |         # Recurse into child nodes
120 |         for child_name, child in node.__dict__.items():
121 |             if isinstance(child, list):
122 |                 for c in child:
123 |                     if hasattr(c, 'type'):
124 |                         extract_call_expression(c, node)
125 |             elif hasattr(child, 'type'):
126 |                 extract_call_expression(child, node)
127 | 
128 |     extract_call_expression(tree)
129 | 
130 |     return endpoints
131 | 
132 | 
133 | def _walk_tree(root):
134 |     stack = [root]
135 |     while stack:
136 |         node = stack.pop()
137 |         yield node
138 |         # named_children keeps noise nodes out of traversal
139 |         stack.extend(reversed(getattr(node, "named_children", [])))
140 | 
141 | 
142 | def _node_text(node, source_bytes):
143 |     return source_bytes[node.start_byte:node.end_byte].decode('utf-8')
144 | 
145 | 
146 | def _clean_literal(value: str):
147 |     if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
148 |         return value[1:-1]
149 |     return value
150 | 
151 | 
152 | def _clean_template_literal(value: str):
153 |     if "${" in value:
154 |         return None
155 |     if value.startswith("`") and value.endswith("`"):
156 |         return value[1:-1]
157 |     return value
158 | 
159 | def _find_matching_brace(source: str, start_idx: int):
160 |     """Find the index of the matching closing brace for source[start_idx] == '{'."""
161 |     depth = 0
162 |     for idx in range(start_idx, len(source)):
163 |         ch = source[idx]
164 |         if ch == "{":
165 |             depth += 1
166 |         elif ch == "}":
167 |             depth -= 1
168 |             if depth == 0:
169 |                 return idx
170 |     return -1
171 | 
172 | def _clean_path_literal(value: str | None):
173 |     if value is None:
174 |         return None
175 |     value = value.strip()
176 |     if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"', "`"}:
177 |         return value[1:-1]
178 |     return value
179 | 
180 | def _collect_decorators(node):
181 |     """Return decorator nodes attached to a class/method or its decorated wrapper."""
182 |     decorators = []
183 |     def _gather(target):
184 |         if not target:
185 |             return
186 |         for child in getattr(target, "children", []):
187 |             if child.type == "decorator":
188 |                 decorators.append(child)
189 |     _gather(node)
190 |     parent = getattr(node, "parent", None)
191 |     if parent and getattr(parent, "type", "") == "decorated_definition":
192 |         _gather(parent)
193 |     return decorators
194 | 
195 | def _parse_decorator(decorator_node, source_bytes):
196 |     """
197 |     Extract decorator identifier and first string/template argument if present.
198 |     Returns (name, arg_value).
199 |     """
200 |     expr = decorator_node.child_by_field_name("expression") or (decorator_node.children[0] if decorator_node.children else None)
201 |     if not expr:
202 |         return None, None
203 |     if expr.type == "call_expression":
204 |         func_node = expr.child_by_field_name("function")
205 |         name = _node_text(func_node, source_bytes) if func_node else None
206 |         args_node = expr.child_by_field_name("arguments")
207 |         arg_val = None
208 |         if args_node:
209 |             for arg in args_node.named_children:
210 |                 if arg.type == "string":
211 |                     arg_val = _clean_literal(_node_text(arg, source_bytes))
212 |                     break
213 |                 if arg.type == "template_string":
214 |                     arg_val = _clean_template_literal(_node_text(arg, source_bytes))
215 |                     break
216 |         return name, arg_val
217 |     if expr.type in {"identifier", "property_identifier"}:
218 |         return _node_text(expr, source_bytes), None
219 |     return None, None
220 | 
221 | def _combine_paths(prefix, path):
222 |     """Combine controller prefix and handler path into a single NestJS route."""
223 |     prefix_part = prefix or "/"
224 |     path_part = path if path is not None else "/"
225 |     if not prefix_part.startswith("/"):
226 |         prefix_part = "/" + prefix_part
227 |     if not path_part.startswith("/"):
228 |         path_part = "/" + path_part
229 |     combined = re.sub(r"//+", "/", prefix_part.rstrip("/") + path_part)
230 |     return combined if combined.startswith("/") else "/" + combined
231 | 
232 | 
233 | def _looks_like_route_object(name: str) -> bool:
234 |     low = name.lower()
235 |     return low in ROUTE_OBJECT_KEYWORDS or any(low.endswith(suf) for suf in ROUTE_OBJECT_SUFFIXES) or low.startswith(("app", "api"))
236 | 
237 | 
238 | def _select_ts_language(file_path: Path):
239 |     suffix = file_path.suffix.lower()
240 |     if suffix in TSX_FILE_EXTENSIONS and TSX_LANGUAGE:
241 |         return TSX_LANGUAGE
242 |     if suffix in TYPESCRIPT_FILE_EXTENSIONS and TS_LANGUAGE:
243 |         return TS_LANGUAGE
244 |     return None
245 | 
246 | 
247 | def _find_api_endpoints_ts(file_path: Path, source: str):
248 |     language = _select_ts_language(file_path)
249 |     if not language:
250 |         return _extract_endpoints_with_regex(source, file_path)
251 | 
252 |     parser = Parser(language)
253 |     try:
254 |         tree = parser.parse(source.encode('utf-8'))
255 |     except Exception:
256 |         return _extract_endpoints_with_regex(source, file_path)
257 | 
258 |     endpoints = []
259 |     source_bytes = source.encode('utf-8')
260 |     seen = set()
261 |     for endpoint in _extract_nest_endpoints(tree, source_bytes, file_path):
262 |         key = (endpoint["method"], endpoint.get("route"), endpoint.get("start_line"))
263 |         if key not in seen:
264 |             endpoints.append(endpoint)
265 |             seen.add(key)
266 | 
267 |     for node in _walk_tree(tree.root_node):
268 |         if node.type != "call_expression":
269 |             continue
270 |         endpoint = _extract_endpoint_from_ts_call(node, source_bytes, file_path)
271 |         if endpoint:
272 |             key = (endpoint["method"], endpoint.get("route"), endpoint.get("start_line"))
273 |             if key not in seen:
274 |                 endpoints.append(endpoint)
275 |                 seen.add(key)
276 | 
277 |     if not endpoints:
278 |         for endpoint in _extract_nest_endpoints_regex(source, file_path):
279 |             key = (endpoint["method"], endpoint.get("route"), endpoint.get("start_line"))
280 |             if key not in seen:
281 |                 endpoints.append(endpoint)
282 |                 seen.add(key)
283 |     return endpoints
284 | 
285 | 
286 | def _extract_endpoint_from_ts_call(node, source_bytes, file_path: Path):
287 |     func_node = node.child_by_field_name("function")
288 |     if not func_node or func_node.type != "member_expression":
289 |         return None
290 |     property_node = func_node.child_by_field_name("property")
291 |     object_node = func_node.child_by_field_name("object")
292 |     if not property_node or property_node.type not in {"property_identifier", "identifier"}:
293 |         return None
294 |     method_name = _node_text(property_node, source_bytes).strip().lower()
295 |     if method_name not in API_METHODS:
296 |         return None
297 |     route_object_name = _node_text(object_node, source_bytes) if object_node else ""
298 |     if not _looks_like_route_object(route_object_name):
299 |         return None
300 | 
301 |     route = None
302 |     arguments_node = node.child_by_field_name("arguments")
303 |     if arguments_node:
304 |         for child in arguments_node.named_children:
305 |             if child.type == "string":
306 |                 route = _clean_literal(_node_text(child, source_bytes))
307 |                 break
308 |             if child.type == "template_string":
309 |                 route = _clean_template_literal(_node_text(child, source_bytes))
310 |                 break
311 | 
312 |     return {
313 |         "type": "function",
314 |         "method": method_name.upper(),
315 |         "route": route,
316 |         "start_line": node.start_point[0] + 1,
317 |         "end_line": node.end_point[0] + 1,
318 |         "file_path": str(file_path),
319 |     }
320 | 
321 | def _extract_nest_endpoints(tree, source_bytes, file_path: Path):
322 |     """Extract NestJS controller + method decorator routes."""
323 |     endpoints = []
324 |     for node in _walk_tree(tree.root_node):
325 |         if node.type != "class_declaration":
326 |             continue
327 |         controller_prefix = None
328 |         for decorator in _collect_decorators(node):
329 |             name, arg = _parse_decorator(decorator, source_bytes)
330 |             if not name:
331 |                 continue
332 |             if name.lower() == "controller":
333 |                 controller_prefix = arg or "/"
334 |                 break
335 |         if controller_prefix is None:
336 |             continue
337 | 
338 |         for child in node.named_children:
339 |             if child.type not in {"method_definition", "public_field_definition"}:
340 |                 continue
341 |             method_http = None
342 |             method_path = None
343 |             for decorator in _collect_decorators(child):
344 |                 name, arg = _parse_decorator(decorator, source_bytes)
345 |                 if not name:
346 |                     continue
347 |                 low = name.lower()
348 |                 if low in API_METHODS:
349 |                     method_http = low.upper()
350 |                     method_path = arg if arg is not None else "/"
351 |                     break
352 |             if not method_http:
353 |                 continue
354 |             start_line = child.start_point[0] + 1 if hasattr(child, "start_point") else None
355 |             end_line = child.end_point[0] + 1 if hasattr(child, "end_point") else None
356 |             route = _combine_paths(controller_prefix, method_path)
357 |             endpoints.append({
358 |                 "type": "function",
359 |                 "method": method_http,
360 |                 "route": route,
361 |                 "start_line": start_line,
362 |                 "end_line": end_line,
363 |                 "file_path": str(file_path)
364 |             })
365 |     return endpoints
366 | 
367 | def _extract_nest_endpoints_regex(source: str, file_path: Path):
368 |     """
369 |     Fallback extractor for NestJS controllers using regex when tree-sitter parsing misses decorators.
370 |     This is intentionally permissive to avoid empty output on complex TS syntax.
371 |     """
372 |     endpoints = []
373 |     controller_re = re.compile(r'@Controller\s*\(\s*(?P<arg>(`[^`]*`|"[^"]*"|\'[^\']*\'|[^)]*)?)\s*\)', re.MULTILINE)
374 |     class_re = re.compile(r'class\s+[A-Za-z_]\w*\s*[^{]*\{', re.MULTILINE)
375 |     method_re = re.compile(r'@(Get|Post|Put|Delete|Patch|Options|Head|All)\s*\(\s*(?P<arg>(`[^`]*`|"[^"]*"|\'[^\']*\'|[^)]*)?)\s*\)', re.IGNORECASE)
376 | 
377 |     for controller_match in controller_re.finditer(source):
378 |         prefix_raw = controller_match.group("arg") or ""
379 |         prefix = _clean_path_literal(prefix_raw) or "/"
380 |         search_start = controller_match.end()
381 |         class_match = class_re.search(source, search_start)
382 |         if not class_match:
383 |             continue
384 |         brace_start = source.find("{", class_match.start())
385 |         if brace_start == -1:
386 |             continue
387 |         brace_end = _find_matching_brace(source, brace_start)
388 |         if brace_end == -1:
389 |             continue
390 |         body = source[brace_start:brace_end]
391 |         base_line = source.count("\n", 0, brace_start) + 1
392 | 
393 |         for method_match in method_re.finditer(body):
394 |             method_http = method_match.group(1).upper()
395 |             path_raw = method_match.group("arg") or ""
396 |             cleaned_path = _clean_path_literal(path_raw) or "/"
397 |             route = _combine_paths(prefix, cleaned_path)
398 |             start_line = base_line + body.count("\n", 0, method_match.start())
399 |             end_line = start_line
400 |             endpoints.append({
401 |                 "type": "function",
402 |                 "method": method_http,
403 |                 "route": route,
404 |                 "start_line": start_line,
405 |                 "end_line": end_line,
406 |                 "file_path": str(file_path)
407 |             })
408 |     return endpoints
409 | 
410 | 
411 | # Example usage
412 | if __name__ == "__main__":
413 |     test_file = Path("/Users/ankits/My-Favourite-Playlist/server.js")  # path to Node.js file
414 |     results = find_api_endpoints_js(test_file)
415 |     print(json.dumps(results, indent=2))
416 | 


--------------------------------------------------------------------------------
/golang_pipeline/run_swagger_generation.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import re
  4 | import shutil
  5 | import tempfile
  6 | import datetime
  7 | from concurrent.futures import ThreadPoolExecutor, as_completed
  8 | from pathlib import Path
  9 | from typing import Dict, List, Optional, Tuple
 10 | 
 11 | from config import Configurations
 12 | from golang_pipeline.definition_swagger_generator import (
 13 |     get_function_definition_swagger,
 14 | )
 15 | from golang_pipeline.find_api_definition_files import find_api_definition_files
 16 | from golang_pipeline.generate_file_information import process_file
 17 | from golang_pipeline.identify_api_functions import find_api_endpoints
 18 | from utils import get_git_commit_hash, get_github_repo_url, get_repo_path, get_repo_name
 19 | 
 20 | config = Configurations()
 21 | 
 22 | _FUNCTION_INDEX_CACHE: Dict[str, List[Dict[str, object]]] = {}
 23 | _FUNCTION_INDEX_CACHE_ROOT: Optional[str] = None
 24 | _FILE_CONTENT_CACHE: Dict[str, List[str]] = {}
 25 | _METADATA_DIR: Optional[str] = None
 26 | _HEADER_PATTERN = re.compile(
 27 |     r"""\.Get(?:String|Header)\(\s*["']([^"']+)["']\s*\)|
 28 |         Header\.Get\(\s*["']([^"']+)["']\s*\)""",
 29 |     re.VERBOSE,
 30 | )
 31 | 
 32 | 
 33 | def should_process_directory(dir_path: str) -> bool:
 34 |     path_parts = dir_path.split(os.sep)
 35 |     return not any(part in config.ignored_dirs for part in path_parts)
 36 | 
 37 | 
 38 | def _sanitize_json_filename(file_path: str) -> str:
 39 |     return f"{file_path.replace(os.sep, '_q_')}.json"
 40 | 
 41 | 
 42 | def _ensure_function_index(directory_path: str) -> Dict[str, List[Dict[str, object]]]:
 43 |     global _FUNCTION_INDEX_CACHE
 44 |     global _FUNCTION_INDEX_CACHE_ROOT
 45 | 
 46 |     if _FUNCTION_INDEX_CACHE and _FUNCTION_INDEX_CACHE_ROOT == directory_path:
 47 |         return _FUNCTION_INDEX_CACHE
 48 | 
 49 |     _FUNCTION_INDEX_CACHE = {}
 50 |     _FUNCTION_INDEX_CACHE_ROOT = directory_path
 51 |     metadata_dir = _METADATA_DIR
 52 |     if not metadata_dir or not os.path.exists(metadata_dir):
 53 |         return _FUNCTION_INDEX_CACHE
 54 | 
 55 |     for entry in os.scandir(metadata_dir):
 56 |         if not entry.is_file() or not entry.name.endswith(".json"):
 57 |             continue
 58 |         try:
 59 |             with open(entry.path, "r", encoding="utf-8") as handle:
 60 |                 data = json.load(handle)
 61 |         except (OSError, json.JSONDecodeError):
 62 |             continue
 63 |         elements = data.get("elements", {})
 64 |         functions = elements.get("functions", [])
 65 |         for func in functions:
 66 |             name = func.get("name")
 67 |             start_line = func.get("start_line")
 68 |             end_line = func.get("end_line")
 69 |             file_name = data.get("filename") or func.get("file_path")
 70 |             if (
 71 |                 not name
 72 |                 or not isinstance(start_line, int)
 73 |                 or not isinstance(end_line, int)
 74 |                 or not file_name
 75 |             ):
 76 |                 continue
 77 |             _FUNCTION_INDEX_CACHE.setdefault(name, []).append(
 78 |                 {
 79 |                     "file_path": file_name,
 80 |                     "start_line": start_line,
 81 |                     "end_line": end_line,
 82 |                 }
 83 |             )
 84 |     return _FUNCTION_INDEX_CACHE
 85 | 
 86 | 
 87 | def _find_function_definition(
 88 |     directory_path: str,
 89 |     function_name: str,
 90 |     preferred_file: Optional[str] = None,
 91 |     route_file: Optional[str] = None,
 92 | ) -> Optional[Dict[str, object]]:
 93 |     index = _ensure_function_index(directory_path)
 94 |     entries = index.get(function_name, [])
 95 |     if not entries:
 96 |         return None
 97 |     if preferred_file:
 98 |         for entry in entries:
 99 |             if entry.get("file_path") == preferred_file:
100 |                 return entry
101 |     if route_file:
102 |         route_path = Path(route_file)
103 |         route_stem = route_path.stem
104 |         tokens: List[str] = []
105 |         if route_stem.endswith("_route"):
106 |             tokens.append(route_stem[: -len("_route")])
107 |             tokens.append(route_stem[: -len("_route")] + "_controller")
108 |         tokens.append(route_stem.replace("route", "controller"))
109 |         best_entry = None
110 |         best_score = -1
111 |         for entry in entries:
112 |             score = 0
113 |             file_path = entry.get("file_path") or ""
114 |             if "controller" in file_path:
115 |                 score += 5
116 |             for token in tokens:
117 |                 if token and token in file_path:
118 |                     score += 10
119 |             if score > best_score:
120 |                 best_score = score
121 |                 best_entry = entry
122 |         if best_entry:
123 |             return best_entry
124 |     return entries[0]
125 | 
126 | 
127 | def _hydrate_method_info(
128 |     directory_path: str, method_info: Dict[str, object]
129 | ) -> Optional[Dict[str, object]]:
130 |     if method_info.get("start_line") and method_info.get("end_line") and method_info.get("file_path" ):
131 |         return method_info
132 | 
133 |     handler_name = method_info.get("handler_name") or method_info.get("name")
134 |     if not handler_name:
135 |         return None
136 | 
137 |     preferred_file = method_info.get("file_path")
138 |     definition = _find_function_definition(
139 |         directory_path, handler_name, preferred_file, method_info.get("route_file")
140 |     )
141 |     if not definition:
142 |         return None
143 | 
144 |     method_info = method_info.copy()
145 |     method_info["file_path"] = definition.get("file_path")
146 |     method_info["start_line"] = definition.get("start_line")
147 |     method_info["end_line"] = definition.get("end_line")
148 |     return method_info
149 | 
150 | 
151 | def _read_file_lines(file_path: str) -> Optional[List[str]]:
152 |     cached = _FILE_CONTENT_CACHE.get(file_path)
153 |     if cached is not None:
154 |         return cached
155 |     try:
156 |         with open(file_path, "r", encoding="utf-8") as handle:
157 |             lines = handle.readlines()
158 |     except OSError:
159 |         return None
160 |     _FILE_CONTENT_CACHE[file_path] = lines
161 |     return lines
162 | 
163 | 
164 | def get_dependencies(
165 |     data: Dict, start_line: int, end_line: int, file_path: str
166 | ) -> Tuple[List[Dict], List[Dict]]:
167 |     existing_function_names = [
168 |         item.get("name") for item in data.get("elements", {}).get("functions", [])
169 |     ]
170 |     in_file_dependency_functions: List[Dict] = []
171 |     for call in data.get("elements", {}).get("function_calls", []):
172 |         call_name = call.get("name")
173 |         call_start = call.get("start_line")
174 |         call_end = call.get("end_line")
175 |         if (
176 |             call_name in existing_function_names
177 |             and isinstance(call_start, int)
178 |             and isinstance(call_end, int)
179 |             and start_line <= call_start <= end_line
180 |         ):
181 |             entry = call.copy()
182 |             entry["file_path"] = file_path
183 |             in_file_dependency_functions.append(entry)
184 | 
185 |     imported_functions: List[Dict] = []
186 |     for item in data.get("imports", []):
187 |         usage_lines = item.get("usage_lines", [])
188 |         if not usage_lines:
189 |             continue
190 |         for usage in usage_lines:
191 |             if start_line <= usage <= end_line:
192 |                 imported_functions.append(item)
193 |                 break
194 |     return in_file_dependency_functions, imported_functions
195 | 
196 | 
197 | def get_code_blocks(
198 |     in_file_dependency_functions: List[Dict],
199 |     imported_functions: List[Dict],
200 |     file_name: str,
201 |     directory_path: str,
202 | ) -> List[List[str]]:
203 |     code_blocks: List[List[str]] = []
204 |     lines = _read_file_lines(file_name) or []
205 |     for block in in_file_dependency_functions:
206 |         start = block.get("function_start_line") or block.get("start_line")
207 |         end = block.get("function_end_line") or block.get("end_line")
208 |         if not isinstance(start, int) or not isinstance(end, int):
209 |             continue
210 |         segment = lines[start - 1 : end]
211 |         if segment:
212 |             code_blocks.append(segment)
213 | 
214 |     metadata_dir = _METADATA_DIR
215 |     if not metadata_dir:
216 |         return code_blocks
217 |     for imp in imported_functions:
218 |         origin = imp.get("origin")
219 |         if not origin:
220 |             continue
221 |         if os.path.isdir(origin):
222 |             candidates = [
223 |                 os.path.join(origin, name)
224 |                 for name in os.listdir(origin)
225 |                 if name.endswith(".go")
226 |             ]
227 |         else:
228 |             candidates = [origin] if origin.endswith(".go") else []
229 |         for candidate in candidates:
230 |             json_file = os.path.join(
231 |                 metadata_dir, _sanitize_json_filename(candidate)
232 |             )
233 |             if not os.path.exists(json_file):
234 |                 continue
235 |             try:
236 |                 with open(json_file, "r", encoding="utf-8") as handle:
237 |                     data = json.load(handle)
238 |             except (OSError, json.JSONDecodeError):
239 |                 continue
240 |             elements = data.get("elements", {})
241 |             for func in elements.get("functions", []):
242 |                 if func.get("name") == imp.get("imported_name"):
243 |                     origin_lines = _read_file_lines(candidate) or []
244 |                     snippet = origin_lines[
245 |                         func.get("start_line", 1) - 1 : func.get("end_line", 1)
246 |                     ]
247 |                     if snippet:
248 |                         code_blocks.append(snippet)
249 |                     break
250 |     return code_blocks
251 | 
252 | 
253 | def _extract_header_names(method_lines: List[str]) -> List[str]:
254 |     text = "".join(method_lines)
255 |     headers: List[str] = []
256 |     for match in _HEADER_PATTERN.finditer(text):
257 |         name = match.group(1) or match.group(2)
258 |         if name:
259 |             headers.append(name)
260 |     return sorted(set(headers))
261 | 
262 | 
263 | def _build_header_hint_block(method_lines: List[str]) -> Optional[List[str]]:
264 |     header_names = _extract_header_names(method_lines)
265 |     if not header_names:
266 |         return None
267 |     block = [
268 |         "# Request headers referenced directly in this handler.\n",
269 |         "# Document each of these headers as required parameters when applicable.\n",
270 |     ]
271 |     for name in header_names:
272 |         block.append(f"# header: {name}\n")
273 |     return block
274 | 
275 | 
276 | def _load_types_from_origin(
277 |     origin: str, alias: Optional[str], per_alias_limit: int
278 | ) -> List[List[str]]:
279 |     metadata_dir = _METADATA_DIR
280 |     if not metadata_dir:
281 |         return []
282 |     file_candidates: List[str] = []
283 |     if os.path.isdir(origin):
284 |         for entry in os.scandir(origin):
285 |             if entry.is_file() and entry.name.endswith(".go"):
286 |                 file_candidates.append(entry.path)
287 |     elif origin.endswith(".go"):
288 |         file_candidates.append(origin)
289 |     blocks: List[List[str]] = []
290 |     collected = 0
291 |     for candidate in file_candidates:
292 |         json_file = os.path.join(metadata_dir, _sanitize_json_filename(candidate))
293 |         if not os.path.exists(json_file):
294 |             continue
295 |         try:
296 |             with open(json_file, "r", encoding="utf-8") as handle:
297 |                 data = json.load(handle)
298 |         except (OSError, json.JSONDecodeError):
299 |             continue
300 |         type_entries = data.get("elements", {}).get("types", [])
301 |         if not type_entries:
302 |             continue
303 |         for type_entry in type_entries:
304 |             start = type_entry.get("start_line")
305 |             end = type_entry.get("end_line")
306 |             if not isinstance(start, int) or not isinstance(end, int):
307 |                 continue
308 |             lines = _read_file_lines(candidate)
309 |             if lines is None:
310 |                 continue
311 |             qualifier = f"{alias}." if alias else ""
312 |             header = [f"# Type {qualifier}{type_entry.get('name')} from {candidate}\n"]
313 |             blocks.append(header + lines[start - 1 : end])
314 |             collected += 1
315 |             if per_alias_limit and collected >= per_alias_limit:
316 |                 return blocks
317 |     return blocks
318 | 
319 | 
320 | def _collect_import_type_blocks(
321 |     imports: List[Dict], per_alias_limit: int = 3
322 | ) -> List[List[str]]:
323 |     if not imports:
324 |         return []
325 |     blocks: List[List[str]] = []
326 |     seen: set = set()
327 |     for import_entry in imports:
328 |         if not import_entry.get("path_exists"):
329 |             continue
330 |         origin = import_entry.get("origin")
331 |         if not origin:
332 |             continue
333 |         alias = import_entry.get("alias") or import_entry.get("imported_name")
334 |         key = (alias, origin)
335 |         if key in seen:
336 |             continue
337 |         seen.add(key)
338 |         type_blocks = _load_types_from_origin(origin, alias, per_alias_limit)
339 |         blocks.extend(type_blocks)
340 |     return blocks
341 | 
342 | 
343 | 
344 | 
345 | def provide_context_codeblock(directory_path: str, method_info: Dict):
346 |     file_name = method_info["file_path"]
347 |     lines = _read_file_lines(file_name) or []
348 |     start_line = method_info.get("start_line", 1)
349 |     end_line = method_info.get("end_line", start_line)
350 |     method_definition_code_block = lines[start_line - 1 : end_line]
351 | 
352 |     metadata_dir = _METADATA_DIR
353 |     data = {"elements": {"functions": [], "function_calls": []}, "imports": []}
354 |     if metadata_dir:
355 |         json_file = os.path.join(metadata_dir, _sanitize_json_filename(file_name))
356 |         try:
357 |             with open(json_file, "r", encoding="utf-8") as handle:
358 |                 data = json.load(handle)
359 |         except (OSError, json.JSONDecodeError):
360 |             data = {"elements": {"functions": [], "function_calls": []}, "imports": []}
361 | 
362 |     in_file_dependency_functions, imported_functions = get_dependencies(
363 |         data, start_line, end_line, file_name
364 |     )
365 |     context_code_blocks = get_code_blocks(
366 |         in_file_dependency_functions, imported_functions, file_name, directory_path
367 |     )
368 |     header_block = _build_header_hint_block(method_definition_code_block)
369 |     type_blocks = _collect_import_type_blocks(data.get("imports", []))
370 |     prefix_blocks: List[List[str]] = []
371 |     if header_block:
372 |         prefix_blocks.append(header_block)
373 |     prefix_blocks.extend(type_blocks)
374 |     context_code_blocks = prefix_blocks + context_code_blocks
375 |     return context_code_blocks, method_definition_code_block
376 | 
377 | 
378 | def run_swagger_generation(host: str) -> Dict:
379 |     directory_path = get_repo_path()
380 |     repo_name = get_repo_name()
381 |     global _METADATA_DIR
382 |     metadata_dir = tempfile.mkdtemp(prefix="qodex_go_file_info_")
383 |     _METADATA_DIR = metadata_dir
384 | 
385 |     try:
386 |         for root, _, files in os.walk(directory_path):
387 |             for filename in files:
388 |                 file_path = os.path.join(root, filename)
389 |                 if (
390 |                     os.path.exists(file_path)
391 |                     and should_process_directory(file_path)
392 |                     and file_path.endswith(".go")
393 |                 ):
394 |                     try:
395 |                         file_info = process_file(file_path, directory_path)
396 |                     except Exception:
397 |                         continue
398 |                     json_file_name = os.path.join(
399 |                         metadata_dir, _sanitize_json_filename(file_path)
400 |                     )
401 |                     with open(json_file_name, "w", encoding="utf-8") as handle:
402 |                         json.dump(file_info, handle, indent=4)
403 | 
404 |         api_files = find_api_definition_files(directory_path)
405 |         endpoints: List[Dict] = []
406 |         for file in api_files:
407 |             endpoints.extend(find_api_endpoints(Path(file), directory_path))
408 | 
409 |         swagger = {
410 |             "openapi": "3.0.0",
411 |             "info": {
412 |                 "title": repo_name,
413 |                 "version": "1.0.0",
414 |                 "description": "This Swagger file was generated using OpenAI GPT.",
415 |                 "generated_at": datetime.datetime.utcnow().isoformat() + "Z",
416 |                 "commit_reference": get_git_commit_hash(),
417 |                 "github_repo_url": get_github_repo_url(),
418 |             },
419 |             "servers": [{"url": host}],
420 |             "paths": {},
421 |         }
422 | 
423 |         endpoint_jobs: List[Dict] = []
424 |         for endpoint in endpoints:
425 |             hydrated = _hydrate_method_info(directory_path, endpoint)
426 |             if not hydrated:
427 |                 continue
428 |             endpoint_jobs.append(hydrated)
429 | 
430 |         if not endpoint_jobs:
431 |             return swagger
432 | 
433 |         def _generate_swagger_fragment(method_info: Dict) -> Dict:
434 |             context_blocks, method_definition = provide_context_codeblock(
435 |                 directory_path, method_info
436 |             )
437 |             http_method = method_info.get("http_method") or "GET"
438 |             if http_method:
439 |                 context_blocks = [[f"HTTP_METHOD: {http_method}\\n"]] + context_blocks
440 |             handler_metadata = method_info.get("handler_selector") or method_info.get("name")
441 |             if handler_metadata:
442 |                 context_blocks = [[f"HANDLER: {handler_metadata}\\n"]] + context_blocks
443 |             return get_function_definition_swagger(
444 |                 method_definition, context_blocks, method_info["route"], http_method
445 |             )
446 | 
447 |         with ThreadPoolExecutor(max_workers=5) as executor:
448 |             futures = [
449 |                 executor.submit(_generate_swagger_fragment, job)
450 |                 for job in endpoint_jobs
451 |             ]
452 |             for future in as_completed(futures):
453 |                 swagger_fragment = future.result()
454 |                 for path_key, methods in swagger_fragment.get("paths", {}).items():
455 |                     swagger.setdefault("paths", {}).setdefault(path_key, {})
456 |                     for method, payload in methods.items():
457 |                         swagger["paths"][path_key][method] = payload
458 | 
459 |         return swagger
460 |     finally:
461 |         if metadata_dir and os.path.exists(metadata_dir):
462 |             shutil.rmtree(metadata_dir, ignore_errors=True)
463 |         _METADATA_DIR = None
464 | 


--------------------------------------------------------------------------------